首页 > 代码库 > My Liblinear code
My Liblinear code
train.c
#include <stdio.h>#include <math.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include <errno.h>#include "linear.h"#include <time.h>//modification#define Malloc(type,n) (type *)malloc((n)*sizeof(type))#define INF HUGE_VALvoid print_null(const char *s) {}void exit_with_help(){ printf( "Usage: train [options] training_set_file [model_file]\n" "options:\n" "-s type : set type of solver (default 1)\n" " for multi-class classification\n" " 0 -- L2-regularized logistic regression (primal)\n" " 1 -- L2-regularized L2-loss support vector classification (dual)\n" " 2 -- L2-regularized L2-loss support vector classification (primal)\n" " 3 -- L2-regularized L1-loss support vector classification (dual)\n" " 4 -- support vector classification by Crammer and Singer\n" " 5 -- L1-regularized L2-loss support vector classification\n" " 6 -- L1-regularized logistic regression\n" " 7 -- L2-regularized logistic regression (dual)\n" " 8 -- L2-regularized SVOR\n"//my modification " 9 -- L2-regularized NPSVOR\n" " 10 -- L2-regularized SVMOP\n" " for regression\n" " 11 -- L2-regularized L2-loss support vector regression (primal)\n" " 12 -- L2-regularized L2-loss support vector regression (dual)\n" " 13 -- L2-regularized L1-loss support vector regression (dual)\n" "-c cost : set the parameter C (default 1)\n" "-o cost one: set the parameter C1 for NPSVOR(defult = C)\n" "-t cost two: set the parameter C2 for NPSVOR(defult = C)\n" "-g grid seach C1 and C2: g=1 find C1=C2, g=2 find C1!=C2\n" "-r rho: parameter of ADMM for SVOR\n" "-l weight loss for SVOR,|k-y|^w, w in {0,1,2}\n" "-m select the algorithm for svor\n" "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n" "-e epsilon : set tolerance of termination criterion\n" " -s 0 and 2\n" " |f‘(w)|_2 <= eps*min(pos,neg)/l*|f‘(w0)|_2,\n" " where f is the primal function and pos/neg are # of\n" " positive/negative data (default 0.01)\n" " -s 11\n" " |f‘(w)|_2 <= eps*|f‘(w0)|_2 (default 0.001)\n" " -s 1, 3, 4, and 7\n" " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" " -s 5 and 6\n" " |f‘(w)|_1 <= eps*min(pos,neg)/l*|f‘(w0)|_1,\n" " where f is the primal function (default 0.01)\n" " -s 12 and 13\n" " |f‘(alpha)|_1 <= eps |f‘(alpha0)|,\n" " where f is the dual function (default 0.1)\n" "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-v n: n-fold cross validation mode\n" "-C : find parameter C (only for -s 0 and 2)\n" "-q : quiet mode (no outputs)\n" ); exit(1);}void exit_input_error(int line_num){ fprintf(stderr,"Wrong input format at line %d\n", line_num); exit(1);}static char *line = NULL;static int max_line_len;static char* readline(FILE *input){ int len; if(fgets(line,max_line_len,input) == NULL) return NULL; while(strrchr(line,‘\n‘) == NULL) { max_line_len *= 2; line = (char *) realloc(line,max_line_len); len = (int) strlen(line); if(fgets(line+len,max_line_len-len,input) == NULL) break; } return line;}void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);void read_problem(const char *filename);void do_cross_validation(const char *filename);void do_find_parameter_C(const char *filename);void do_find_parameter_npsvor(const char *filename);struct feature_node *x_space;struct parameter param;struct problem prob;struct model* model_;int flag_cross_validation;int flag_find_C;int flag_C_specified;int flag_solver_specified;int nr_fold;double bias;int main(int argc, char **argv){ char input_file_name[1024]; char model_file_name[1024]; const char *error_msg; parse_command_line(argc, argv, input_file_name, model_file_name); read_problem(input_file_name); error_msg = check_parameter(&prob,¶m); if(error_msg) { fprintf(stderr,"ERROR: %s\n",error_msg); exit(1); } if (flag_find_C) { if(param.solver_type == L2R_NPSVOR && param.g ==2) do_find_parameter_npsvor(input_file_name); else do_find_parameter_C(input_file_name); } else if(flag_cross_validation) { do_cross_validation(input_file_name); } else { //fprintf(stderr,"Test:param.solver_type %d\n",param.solver_type); model_=train(&prob, ¶m); // for(i=0;i<sub_prob.n;i++) // info("w%.6f\n",model_->w[i]); // for(i=0;i<nr_class-1;i++) // info("b %.6f\n",model_->b[i]); if(save_model(model_file_name, model_)) { fprintf(stderr,"can‘t save model to file %s\n",model_file_name); exit(1); } free_and_destroy_model(&model_); } destroy_param(¶m); free(prob.y); free(prob.x); free(x_space); free(line); return 0;}static const char *solver_type_table[]={ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS", "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", "L2R_SVOR", "L2R_NPSVOR", "L2R_SVMOP",//My modification "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL};void do_find_parameter_C(const char *input_file_name){ double start_C, best_C, best_acc_rate, best_mae_rate; double max_C = 1024; clock_t start, stop; double cvtime; if (flag_C_specified) start_C = param.C; else start_C = -1.0; printf("Doing parameter search with %d-fold cross validation.\n", nr_fold); start=clock(); find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, &best_C, &best_acc_rate,&best_mae_rate); stop=clock(); cvtime = (double)(stop-start)/CLOCKS_PER_SEC; printf("Best C = %g CV acc = %g CV mae = %g%%\n", best_C, best_acc_rate, best_mae_rate); param.C = best_C; if(param.solver_type == L2R_NPSVOR) { param.C1 = param.C; param.C2 = param.C; } // printf("%g\n",log(param.C)/log(2.0)); do_cross_validation(input_file_name); char file_name[1024]; sprintf(file_name,"%s_%s%g_out.log",solver_type_table[param.solver_type],input_file_name,prob.bias); FILE * out = fopen(file_name,"a+t"); fprintf(out, "CVTime = %g\n",cvtime); if (NULL != out) fclose(out) ; // clear the old file. }void do_find_parameter_npsvor(const char *input_file_name){ double start_C, best_C1, best_C2, best_acc_rate, best_mae_rate; double max_C = 1024; if (flag_C_specified) start_C = param.C; else start_C = -1.0; printf("Doing parameter search with %d-fold cross validation.\n", nr_fold); find_parameter_npsvor(&prob, ¶m, nr_fold, start_C, max_C, &best_C1, &best_C2, &best_acc_rate,&best_mae_rate); printf("Best C1 = %g Best C2 = %g CV acc = %g CV mae = %g%%\n", best_C1, best_C2, best_acc_rate, best_mae_rate);}void do_cross_validation(const char *input_file_name){ int i; int total_correct = 0; double total_error = 0; double *target = Malloc(double, prob.l); double cvtime; char file_name[1024]; sprintf(file_name,"%s_%s%g_cv.log",solver_type_table[param.solver_type],input_file_name,prob.bias); FILE * log = fopen(file_name,"w"); sprintf(file_name,"%s_%s%g_out.log",solver_type_table[param.solver_type],input_file_name,prob.bias); FILE * out = fopen(file_name,"w"); clock_t start, stop; start=clock(); cross_validation(&prob,¶m,nr_fold,target); stop=clock(); cvtime = (double)(stop-start)/CLOCKS_PER_SEC; for(i=0;i<prob.l;i++) {if(target[i] == prob.y[i]) ++total_correct; total_error += fabs(target[i]-prob.y[i]); } //printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l); printf(" %g %g\n",1.0*total_correct/prob.l,total_error/prob.l); fprintf(out, "parameter C = %g, Accuracy= %g MAE= %g Time = %g ", param.C, 1.0*total_correct/prob.l,total_error/prob.l, cvtime); for(i=0;i<prob.l;i++) fprintf(log,"%g %g\n", prob.y[i], target[i]); if (NULL != log) fclose(log) ; // clear the old file. free(target); if (NULL != out) fclose(out) ; // clear the old file. }void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name){ int i; void (*print_func)(const char*) = NULL; // default printing to stdout // default values param.solver_type = L2R_L2LOSS_SVC_DUAL; param.C = 1; param.eps = INF; // see setting below param.p = 0.1; param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; param.init_sol = NULL;/** -------------------my modification---------------------------*/ param.rho = 1; param.wl = 0; param.svor = 1; param.npsvor = 1; param.C1 = 1; param.C2 = 1; param.g = 1;/** -------------------my modification---------------------------*/ flag_cross_validation = 0; flag_C_specified = 0; flag_solver_specified = 0; flag_find_C = 0; bias = -1; // parse options for(i=1;i<argc;i++) { if(argv[i][0] != ‘-‘) break; if(++i>=argc) exit_with_help(); switch(argv[i-1][1]) {/** -------------------my modification---------------------------*/ case ‘r‘://my parameter s; param.rho = atof(argv[i]); break; case ‘l‘://my parameter t; param.wl = atof(argv[i]); break; case ‘m‘://my parameter t; param.svor = atof(argv[i]); param.npsvor = atof(argv[i]); break; case ‘o‘: // one param.C1 = atof(argv[i]); break; case ‘t‘: // two param.C2 = atof(argv[i]); break; case ‘g‘: // two param.g = atof(argv[i]); break; /** -------------------my modification---------------------------*/ case ‘s‘: param.solver_type = atoi(argv[i]); flag_solver_specified = 1; break; case ‘c‘: param.C = atof(argv[i]); param.C1 = param.C; // param.C2 = param.C; // flag_C_specified = 1; break; case ‘p‘: param.p = atof(argv[i]); break; case ‘e‘: param.eps = atof(argv[i]); break; case ‘B‘: bias = atof(argv[i]); break; case ‘w‘: ++param.nr_weight; param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight); param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight); param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); param.weight[param.nr_weight-1] = atof(argv[i]); break; case ‘v‘: flag_cross_validation = 1; nr_fold = atoi(argv[i]); if(nr_fold < 2) { fprintf(stderr,"n-fold cross validation: n must >= 2\n"); exit_with_help(); } break; case ‘q‘: print_func = &print_null; i--; break; case ‘C‘: flag_find_C = 1; i--; break; default: fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); exit_with_help(); break; } } set_print_string_function(print_func); // determine filenames if(i>=argc) exit_with_help(); strcpy(input_file_name, argv[i]); if(i<argc-1) strcpy(model_file_name,argv[i+1]); else { char *p = strrchr(argv[i],‘/‘); if(p==NULL) p = argv[i]; else ++p; sprintf(model_file_name,"%s.model",p); } // default solver for parameter selection is L2R_L2LOSS_SVC if(flag_find_C) { if(!flag_cross_validation) nr_fold = 5; if(!flag_solver_specified) { fprintf(stderr, "Solver not specified. Using -s 2\n"); param.solver_type = L2R_L2LOSS_SVC; } // else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC) // { // fprintf(stderr, "Warm-start parameter search only available for -s 0 and -s 2\n"); // exit_with_help(); // } } if(param.eps == INF) { switch(param.solver_type) {/** -------------------my modification---------------------------*/ case L2R_SVOR: param.eps = 0.1; param.rho = 1; break; case L2R_NPSVOR: param.eps = 0.1; break;/** -------------------my modification---------------------------*/ case L2R_LR: case L2R_L2LOSS_SVC: param.eps = 0.01; break; case L2R_L2LOSS_SVR: param.eps = 0.001; break; case L2R_L2LOSS_SVC_DUAL: case L2R_L1LOSS_SVC_DUAL://in this case, eps is set to 0.1 case L2R_SVMOP://in this case, eps is set to 0.1 //param.eps = 0.1; //break; case MCSVM_CS: case L2R_LR_DUAL: param.eps = 0.1; break; case L1R_L2LOSS_SVC: case L1R_LR: param.eps = 0.01; break; case L2R_L1LOSS_SVR_DUAL: case L2R_L2LOSS_SVR_DUAL: param.eps = 0.1; break; } }}// read in a problem (in libsvm format)void read_problem(const char *filename){ int max_index, inst_max_index, i; size_t elements, j; FILE *fp = fopen(filename,"r"); char *endptr; char *idx, *val, *label; if(fp == NULL) { fprintf(stderr,"can‘t open input file %s\n",filename); exit(1); } prob.l = 0; elements = 0; max_line_len = 1024; line = Malloc(char,max_line_len); while(readline(fp)!=NULL) { char *p = strtok(line," \t"); // label // features while(1) { p = strtok(NULL," \t"); if(p == NULL || *p == ‘\n‘) // check ‘\n‘ as ‘ ‘ may be after the last feature break; elements++; } elements++; // for bias term prob.l++; } rewind(fp); prob.bias=bias; prob.y = Malloc(double,prob.l); prob.x = Malloc(struct feature_node *,prob.l); x_space = Malloc(struct feature_node,elements+prob.l); max_index = 0; j=0; for(i=0;i<prob.l;i++) { inst_max_index = 0; // strtol gives 0 if wrong format readline(fp); prob.x[i] = &x_space[j]; label = strtok(line," \t\n"); if(label == NULL) // empty line exit_input_error(i+1); prob.y[i] = strtod(label,&endptr); if(endptr == label || *endptr != ‘\0‘) exit_input_error(i+1); while(1) { idx = strtok(NULL,":"); val = strtok(NULL," \t"); if(val == NULL) break; errno = 0; x_space[j].index = (int) strtol(idx,&endptr,10); if(endptr == idx || errno != 0 || *endptr != ‘\0‘ || x_space[j].index <= inst_max_index) exit_input_error(i+1); else inst_max_index = x_space[j].index; errno = 0; x_space[j].value = http://www.mamicode.com/strtod(val,&endptr);>
#include <math.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <locale.h>#include <time.h>//modification#include "linear.h"#include "tron.h"typedef signed char schar;template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }#ifndef mintemplate <class T> static inline T min(T x,T y) { return (x<y)?x:y; }#endif#ifndef maxtemplate <class T> static inline T max(T x,T y) { return (x>y)?x:y; }#endiftemplate <class S, class T> static inline void clone(T*& dst, S* src, int n){ dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n);}#define Malloc(type,n) (type *)malloc((n)*sizeof(type))#define INF HUGE_VALstatic void print_string_stdout(const char *s){ fputs(s,stdout); fflush(stdout);}static void print_null(const char *s) {}static void (*liblinear_print_string) (const char *) = &print_string_stdout;#if 1static void info(const char *fmt,...){ char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); vsprintf(buf,fmt,ap); va_end(ap); (*liblinear_print_string)(buf);}#elsestatic void info(const char *fmt,...) {}#endifclass sparse_operator{public: static double nrm2_sq(const feature_node *x)//norm2-square { double ret = 0; while(x->index != -1) { ret += x->value*x->value; x++; } return (ret); } static double dot(const double *s, const feature_node *x) { double ret = 0; while(x->index != -1) { ret += s[x->index-1]*x->value; x++; } return (ret); } static void axpy(const double a, const feature_node *x, double *y)//a*x+y { while(x->index != -1) { y[x->index-1] += a*x->value; x++; } }};class l2r_lr_fun: public function{public: l2r_lr_fun(const problem *prob, double *C); ~l2r_lr_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void);private: void Xv(double *v, double *Xv); void XTv(double *v, double *XTv); double *C; double *z; double *D; const problem *prob;};l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C){ int l=prob->l; this->prob = prob; z = new double[l]; D = new double[l]; this->C = C;}l2r_lr_fun::~l2r_lr_fun(){ delete[] z; delete[] D;}double l2r_lr_fun::fun(double *w){ int i; double f=0; double *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); Xv(w, z); for(i=0;i<w_size;i++) f += w[i]*w[i]; f /= 2.0; for(i=0;i<l;i++) { double yz = y[i]*z[i]; if (yz >= 0) f += C[i]*log(1 + exp(-yz)); else f += C[i]*(-yz+log(1 + exp(yz))); } return(f);}void l2r_lr_fun::grad(double *w, double *g){ int i; double *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); for(i=0;i<l;i++) { z[i] = 1/(1 + exp(-y[i]*z[i])); D[i] = z[i]*(1-z[i]); z[i] = C[i]*(z[i]-1)*y[i]; } XTv(z, g); for(i=0;i<w_size;i++) g[i] = w[i] + g[i];}int l2r_lr_fun::get_nr_variable(void){ return prob->n;}void l2r_lr_fun::Hv(double *s, double *Hs){ int i; int l=prob->l; int w_size=get_nr_variable(); double *wa = new double[l]; feature_node **x=prob->x; for(i=0;i<w_size;i++) Hs[i] = 0; for(i=0;i<l;i++) { feature_node * const xi=x[i]; wa[i] = sparse_operator::dot(s, xi); wa[i] = C[i]*D[i]*wa[i]; sparse_operator::axpy(wa[i], xi, Hs); } for(i=0;i<w_size;i++) Hs[i] = s[i] + Hs[i]; delete[] wa;}void l2r_lr_fun::Xv(double *v, double *Xv){ int i; int l=prob->l; feature_node **x=prob->x; for(i=0;i<l;i++) Xv[i]=sparse_operator::dot(v, x[i]);}void l2r_lr_fun::XTv(double *v, double *XTv){ int i; int l=prob->l; int w_size=get_nr_variable(); feature_node **x=prob->x; for(i=0;i<w_size;i++) XTv[i]=0; for(i=0;i<l;i++) sparse_operator::axpy(v[i], x[i], XTv);}class l2r_l2_svc_fun: public function{public: l2r_l2_svc_fun(const problem *prob, double *C); ~l2r_l2_svc_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void);protected: void Xv(double *v, double *Xv); void subXTv(double *v, double *XTv); double *C; double *z; double *D; int *I; int sizeI; const problem *prob;};l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double *C){ int l=prob->l; this->prob = prob; z = new double[l]; D = new double[l]; I = new int[l]; this->C = C;}l2r_l2_svc_fun::~l2r_l2_svc_fun(){ delete[] z; delete[] D; delete[] I;}double l2r_l2_svc_fun::fun(double *w){ int i; double f=0; double *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); Xv(w, z); for(i=0;i<w_size;i++) f += w[i]*w[i]; f /= 2.0; for(i=0;i<l;i++) { z[i] = y[i]*z[i]; double d = 1-z[i]; if (d > 0) f += C[i]*d*d; } return(f);}void l2r_l2_svc_fun::grad(double *w, double *g){ int i; double *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); sizeI = 0; for (i=0;i<l;i++) if (z[i] < 1) { z[sizeI] = C[i]*y[i]*(z[i]-1); I[sizeI] = i; sizeI++; } subXTv(z, g); for(i=0;i<w_size;i++) g[i] = w[i] + 2*g[i];}int l2r_l2_svc_fun::get_nr_variable(void){ return prob->n;}void l2r_l2_svc_fun::Hv(double *s, double *Hs){ int i; int w_size=get_nr_variable(); double *wa = new double[sizeI]; feature_node **x=prob->x; for(i=0;i<w_size;i++) Hs[i]=0; for(i=0;i<sizeI;i++) { feature_node * const xi=x[I[i]]; wa[i] = sparse_operator::dot(s, xi); wa[i] = C[I[i]]*wa[i]; sparse_operator::axpy(wa[i], xi, Hs); } for(i=0;i<w_size;i++) Hs[i] = s[i] + 2*Hs[i]; delete[] wa;}void l2r_l2_svc_fun::Xv(double *v, double *Xv){ int i; int l=prob->l; feature_node **x=prob->x; for(i=0;i<l;i++) Xv[i]=sparse_operator::dot(v, x[i]);}void l2r_l2_svc_fun::subXTv(double *v, double *XTv){ int i; int w_size=get_nr_variable(); feature_node **x=prob->x; for(i=0;i<w_size;i++) XTv[i]=0; for(i=0;i<sizeI;i++) sparse_operator::axpy(v[i], x[I[i]], XTv);}class l2r_l2_svr_fun: public l2r_l2_svc_fun{public: l2r_l2_svr_fun(const problem *prob, double *C, double p); double fun(double *w); void grad(double *w, double *g);private: double p;};l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *C, double p): l2r_l2_svc_fun(prob, C){ this->p = p;}double l2r_l2_svr_fun::fun(double *w){ int i; double f=0; double *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); double d; Xv(w, z); for(i=0;i<w_size;i++) f += w[i]*w[i]; f /= 2; for(i=0;i<l;i++) { d = z[i] - y[i]; if(d < -p) f += C[i]*(d+p)*(d+p); else if(d > p) f += C[i]*(d-p)*(d-p); } return(f);}void l2r_l2_svr_fun::grad(double *w, double *g){ int i; double *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); double d; sizeI = 0; for(i=0;i<l;i++) { d = z[i] - y[i]; // generate index set I if(d < -p) { z[sizeI] = C[i]*(d+p); I[sizeI] = i; sizeI++; } else if(d > p) { z[sizeI] = C[i]*(d-p); I[sizeI] = i; sizeI++; } } subXTv(z, g); for(i=0;i<w_size;i++) g[i] = w[i] + 2*g[i];}// A coordinate descent algorithm for // multi-class support vector machines by Crammer and Singer//// min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i// s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i// // where e^m_i = 0 if y_i = m,// e^m_i = 1 if y_i != m,// C^m_i = C if m = y_i, // C^m_i = 0 if m != y_i, // and w_m(\alpha) = \sum_i \alpha^m_i x_i //// Given: // x, y, C// eps is the stopping tolerance//// solution will be put in w//// See Appendix of LIBLINEAR paper, Fan et al. (2008)#define GETI(i) ((int) prob->y[i])// To support weights for instances, use GETI(i) (i)class Solver_MCSVM_CS{ public: Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000); ~Solver_MCSVM_CS(); void Solve(double *w); private: void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new); bool be_shrunk(int i, int m, int yi, double alpha_i, double minG); double *B, *C, *G; int w_size, l; int nr_class; int max_iter; double eps; const problem *prob;};Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter){ this->w_size = prob->n; this->l = prob->l; this->nr_class = nr_class; this->eps = eps; this->max_iter = max_iter; this->prob = prob; this->B = new double[nr_class]; this->G = new double[nr_class]; this->C = weighted_C;}Solver_MCSVM_CS::~Solver_MCSVM_CS(){ delete[] B; delete[] G;}int compare_double(const void *a, const void *b){ if(*(double *)a > *(double *)b) return -1; if(*(double *)a < *(double *)b) return 1; return 0;}void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new){ int r; double *D; clone(D, B, active_i); if(yi < active_i) D[yi] += A_i*C_yi; qsort(D, active_i, sizeof(double), compare_double); double beta = D[0] - A_i*C_yi; for(r=1;r<active_i && beta<r*D[r];r++) beta += D[r]; beta /= r; for(r=0;r<active_i;r++) { if(r == yi) alpha_new[r] = min(C_yi, (beta-B[r])/A_i); else alpha_new[r] = min((double)0, (beta - B[r])/A_i); } delete[] D;}bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG){ double bound = 0; if(m == yi) bound = C[GETI(i)]; if(alpha_i == bound && G[m] < minG) return true; return false;}void Solver_MCSVM_CS::Solve(double *w){ int i, m, s; int iter = 0; double *alpha = new double[l*nr_class]; double *alpha_new = new double[nr_class]; int *index = new int[l]; double *QD = new double[l]; int *d_ind = new int[nr_class]; double *d_val = new double[nr_class]; int *alpha_index = new int[nr_class*l]; int *y_index = new int[l]; int active_size = l; int *active_size_i = new int[l]; double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking bool start_from_all = true; // Initial alpha can be set here. Note that // sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1 // alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m // alpha[i*nr_class+m] <= 0 if prob->y[i] != m // If initial alpha isn‘t zero, uncomment the for loop below to initialize w for(i=0;i<l*nr_class;i++) alpha[i] = 0; for(i=0;i<w_size*nr_class;i++) w[i] = 0; for(i=0;i<l;i++) { for(m=0;m<nr_class;m++) alpha_index[i*nr_class+m] = m; feature_node *xi = prob->x[i]; QD[i] = 0; while(xi->index != -1) { double val = xi->value; QD[i] += val*val; // Uncomment the for loop if initial alpha isn‘t zero // for(m=0; m<nr_class; m++) // w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val; xi++; } active_size_i[i] = nr_class; y_index[i] = (int)prob->y[i]; index[i] = i; } while(iter < max_iter) { double stopping = -INF; for(i=0;i<active_size;i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0;s<active_size;s++) { i = index[s]; double Ai = QD[i]; double *alpha_i = &alpha[i*nr_class]; int *alpha_index_i = &alpha_index[i*nr_class]; if(Ai > 0) { for(m=0;m<active_size_i[i];m++) G[m] = 1; if(y_index[i] < active_size_i[i]) G[y_index[i]] = 0; feature_node *xi = prob->x[i]; while(xi->index!= -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;m<active_size_i[i];m++) G[m] += w_i[alpha_index_i[m]]*(xi->value); xi++; } double minG = INF; double maxG = -INF; for(m=0;m<active_size_i[i];m++) { if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) minG = G[m]; if(G[m] > maxG) maxG = G[m]; } if(y_index[i] < active_size_i[i]) if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG) minG = G[y_index[i]]; for(m=0;m<active_size_i[i];m++) { if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG)) { active_size_i[i]--; while(active_size_i[i]>m) { if(!be_shrunk(i, active_size_i[i], y_index[i], alpha_i[alpha_index_i[active_size_i[i]]], minG)) { swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); swap(G[m], G[active_size_i[i]]); if(y_index[i] == active_size_i[i]) y_index[i] = m; else if(y_index[i] == m) y_index[i] = active_size_i[i]; break; } active_size_i[i]--; } } } if(active_size_i[i] <= 1) { active_size--; swap(index[s], index[active_size]); s--; continue; } if(maxG-minG <= 1e-12) continue; else stopping = max(maxG - minG, stopping); for(m=0;m<active_size_i[i];m++) B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ; solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new); int nz_d = 0; for(m=0;m<active_size_i[i];m++) { double d = alpha_new[m] - alpha_i[alpha_index_i[m]]; alpha_i[alpha_index_i[m]] = alpha_new[m]; if(fabs(d) >= 1e-12) { d_ind[nz_d] = alpha_index_i[m]; d_val[nz_d] = d; nz_d++; } } xi = prob->x[i]; while(xi->index != -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;m<nz_d;m++) w_i[d_ind[m]] += d_val[m]*xi->value; xi++; } } } iter++; if(iter % 10 == 0) { info("."); } if(stopping < eps_shrink) { if(stopping < eps && start_from_all == true) break; else { active_size = l; for(i=0;i<l;i++) active_size_i[i] = nr_class; info("*"); eps_shrink = max(eps_shrink/2, eps); start_from_all = true; } } else start_from_all = false; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0;i<w_size*nr_class;i++) v += w[i]*w[i]; v = 0.5*v; for(i=0;i<l*nr_class;i++) { v += alpha[i]; if(fabs(alpha[i]) > 0) nSV++; } for(i=0;i<l;i++) v -= alpha[i*nr_class+(int)prob->y[i]]; info("Objective value = http://www.mamicode.com/%lf/n",v); info("nSV = %d\n",nSV); delete [] alpha; delete [] alpha_new; delete [] index; delete [] QD; delete [] d_ind; delete [] d_val; delete [] alpha_index; delete [] y_index; delete [] active_size_i;}// A coordinate descent algorithm for // L1-loss and L2-loss SVM dual problems//// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,// s.t. 0 <= \alpha_i <= upper_bound_i,// // where Qij = yi yj xi^T xj and// D is a diagonal matrix //// In L1-SVM case:// upper_bound_i = Cp if y_i = 1// upper_bound_i = Cn if y_i = -1// D_ii = 0// In L2-SVM case:// upper_bound_i = INF// D_ii = 1/(2*Cp) if y_i = 1// D_ii = 1/(2*Cn) if y_i = -1//// Given: // x, y, Cp, Cn// eps is the stopping tolerance//// solution will be put in w// // See Algorithm 3 of Hsieh et al., ICML 2008#undef GETI#define GETI(i) (y[i]+1)// To support weights for instances, use GETI(i) (i)static void solve_l2r_l1l2_svc( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type){ // info("%f %f\n",Cp,Cn); int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 1000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new; // default solver_type: L2R_L2LOSS_SVC_DUAL double diag[3] = {0.5/Cn, 0, 0.5/Cp}; double upper_bound[3] = {INF, 0, INF}; if(solver_type == L2R_L1LOSS_SVC_DUAL) { diag[0] = 0; diag[2] = 0; upper_bound[0] = Cn; upper_bound[2] = Cp; } for(i=0; i<l; i++) { if(prob->y[i] > 0) { y[i] = +1; } else { y[i] = -1; } } // Initial alpha can be set here. Note that // 0 <= alpha[i] <= upper_bound[GETI(i)] for(i=0; i<l; i++) alpha[i] = 0; for(i=0; i<w_size; i++) w[i] = 0; for(i=0; i<l; i++) { QD[i] = diag[GETI(i)]; feature_node * const xi = prob->x[i]; QD[i] += sparse_operator::nrm2_sq(xi); sparse_operator::axpy(y[i]*alpha[i], xi, w); index[i] = i; } while (iter < max_iter) { PGmax_new = -INF; PGmin_new = INF; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; const schar yi = y[i]; feature_node * const xi = prob->x[i]; G = yi*sparse_operator::dot(w, xi)-1; C = upper_bound[GETI(i)]; G += alpha[i]*diag[GETI(i)]; PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) PG = G; } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) PG = G; } else PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; sparse_operator::axpy(d, xi, w); } } iter++; if(iter % 10 == 0) info("."); if(PGmax_new - PGmin_new <= eps) { if(active_size == l) break; else { active_size = l; info("*"); PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = INF; if (PGmin_old >= 0) PGmin_old = -INF; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; for(i=0; i<l; i++) { v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = http://www.mamicode.com/%lf/n",v/2); //info("nSV = %d\n",nSV); //info("Percentage of SVs:%f \n",(double)nSV*100/l); info(" %f \n",(double)nSV*100/l); delete [] QD; delete [] alpha; delete [] y; delete [] index;}static void solve_l2r_l1l2_svmop( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type){ // info("%f %f\n",Cp,Cn); int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 1000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration // default solver_type: L2R_L2LOSS_SVC_DUAL double diag[3] = {0.5/Cn, 0, 0.5/Cp}; double upper_bound[3] = {INF, 0, INF}; if(solver_type == L2R_L1LOSS_SVC_DUAL) { diag[0] = 0; diag[2] = 0; upper_bound[0] = Cn; upper_bound[2] = Cp; } for(i=0; i<l; i++) { if(prob->y[i] > 0) { y[i] = +1; } else { y[i] = -1; } } // Initial alpha can be set here. Note that // 0 <= alpha[i] <= upper_bound[GETI(i)] for(i=0; i<l; i++) alpha[i] = 0; for(i=0; i<w_size; i++) w[i] = 0; for(i=0; i<l; i++) { QD[i] = diag[GETI(i)]; feature_node * const xi = prob->x[i]; QD[i] += sparse_operator::nrm2_sq(xi); // sparse_operator::axpy(y[i]*alpha[i], xi, w); index[i] = i; } while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; const schar yi = y[i]; feature_node * const xi = prob->x[i]; double violation = 0; G = yi*sparse_operator::dot(w, xi)-1; C = upper_bound[GETI(i)]; G += alpha[i]*diag[GETI(i)]; if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == C) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(G) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; sparse_operator::axpy(d, xi, w); } } if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == l) break; else { active_size = l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; for(i=0; i<l; i++) { v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = http://www.mamicode.com/%lf/n",v/2); //info("nSV = %d\n",nSV); //info("Percentage of SVs:%f \n",(double)nSV*100/l); info(" %f \n",(double)nSV*100/l); delete [] QD; delete [] alpha; delete [] y; delete [] index;}/** ------------------modification begin---------------------------*/int calculate_y_k(double y, int k){ if(y>k+1){ return 1; }else{ return -1; }}double compute_nu_i_k(double y, int k, double power){ if(power==0) return 1; else return fabs(pow(fabs(double(y- k)), power) - pow(fabs(double(y- k-1)), power));}static void solve_l2r_svor(const problem *prob, const parameter *param, double *w, double *b, int *label, int nr_class)//cost refers to power{ int i, j, k, s, iter = 0; int l = prob->l; // int nr_class = 0;//number of classes // int max_nr_class = 16;//max number of classes // int *label = new int[max_nr_class];//category of labels double *y = prob->y; // int this_label = 0; // for(i=0;i<l;i++) // { // this_label = (int)prob->y[i]; // for(j=0;j<nr_class;j++) // { // if(this_label == label[j]) // break; // } // y[i] = this_label; // if(j == nr_class) // { // label[nr_class] = this_label; // ++nr_class; // } // } double eps = param->eps; double C = param->C; double power = param->wl; // int w_size = prob->n; double d, G; double *QD = new double[l]; int max_iter = 1000; int *index = new int[(nr_class - 1)*l]; double *alpha = new double[(nr_class - 1)*l]; int active_size = (nr_class - 1)*l; // PG: projected gradient, for shrinking and stopping double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration/* //to be check // default solver_type: L2R_L2LOSS_SVC_DUAL double diag[3] = {0.5/Cn, 0, 0.5/Cp}; double upper_bound[3] = {INF, 0, INF}; if(solver_type == L2R_L1LOSS_SVC_DUAL) { diag[0] = 0; diag[2] = 0; upper_bound[0] = Cn; upper_bound[2] = Cp; }*/ // Initial alpha can be set here. Note that // 0 <= alpha[i] <= upper_bound[GETI(i)] memset(alpha,0,sizeof(double)*((nr_class - 1)*l)); for(i=0; i<l; i++) { feature_node * const xi = prob->x[i]; double xi_square = sparse_operator::nrm2_sq(xi); QD[i] = xi_square+1; // add for(k= 0; k<nr_class-1; k++) { //int y_i_k = calculate_y_k(y[i], k+1); //QD[k*l + i] = y_i_k*y_i_k*(xi_square+1); // QD[k*l + i] = xi_square+1;// sparse_operator::axpy(y_i_k*alpha[i*nr_class + k], xi, w); index[k*l + i] = k*l + i; } } int kk, ss;//kk(k‘),ss(s) in the paper // int i0,kk0,ss0,t; while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (i=0; i<active_size; i++) { j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; kk = i/l;//k‘ ss = i%l;//s int y_s_ksign = (prob->y[ss] <= label[kk] ? -1 : 1);//ysk‘ feature_node * const xss = prob->x[ss]; G = y_s_ksign*(sparse_operator::dot(w, xss)+b[kk])-1; //C = upper_bound[GETI(i)]; //G += alpha[i]*diag[GETI(i)]; double violation = 0; double upper_bound = C*compute_nu_i_k(y[ss], label[kk], power); if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == upper_bound) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(G) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[ss], 0.0), upper_bound); d = (alpha[i] - alpha_old)*y_s_ksign; sparse_operator::axpy(d, xss, w); b[kk] += d; } } // printf("%d ",active_size); if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == (nr_class - 1)*l) break; else { active_size = (nr_class - 1)*l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n",iter); // for(i=0;i<nr_class-1;i++) info("%.6f\n",b[i]); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value //double v = 0;/* for(i=0; i<w_size; i++) v += w[i]*w[i]; for(i=0; i<(nr_class - 1)*l; i++) { v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = http://www.mamicode.com/%lf/n",v/2);*/ int nSV = 0; for(i=0; i<l; i++) { double alpha_i = 0; for(k= 0; k<nr_class-1; k++) { alpha_i += alpha[k*l + i]; } if(alpha_i > 0) ++nSV; } info("nSV = %d\n",nSV); delete [] QD; delete [] alpha; delete [] y; delete [] index; }// static void solve_l2r_svor(const problem *prob, const parameter *param, double *w,// double *b, int nr_class)//cost refers to power// {// int i, j, k, s, iter = 0;// int l = prob->l;// // int nr_class = 0;//number of classes// // int max_nr_class = 16;//max number of classes// // int *label = new int[max_nr_class];//category of labels// double *y = prob->y;// // int this_label = 0;// // for(i=0;i<l;i++)// // {// // this_label = (int)prob->y[i];// // for(j=0;j<nr_class;j++)// // {// // if(this_label == label[j])// // break;// // }// // y[i] = this_label;// // if(j == nr_class)// // {// // label[nr_class] = this_label;// // ++nr_class;// // }// // }// double eps = param->eps;// double C = param->C;// double power = param->wl;// // int w_size = prob->n;// double d, G;// double *QD = new double[l];// int max_iter = 1000;// int *index = new int[(nr_class - 1)*l];// double *alpha = new double[(nr_class - 1)*l];// int active_size = (nr_class - 1)*l;// // PG: projected gradient, for shrinking and stopping// double PG;// double PGmax_old = INF;// double PGmin_old = -INF;// double PGmax_new, PGmin_new;// /*// //to be check// // default solver_type: L2R_L2LOSS_SVC_DUAL// double diag[3] = {0.5/Cn, 0, 0.5/Cp};// double upper_bound[3] = {INF, 0, INF};// if(solver_type == L2R_L1LOSS_SVC_DUAL)// {// diag[0] = 0;// diag[2] = 0;// upper_bound[0] = Cn;// upper_bound[2] = Cp;// }// */// // Initial alpha can be set here. Note that// // 0 <= alpha[i] <= upper_bound[GETI(i)]// memset(alpha,0,sizeof(double)*((nr_class - 1)*l));// // for(i=0; i<(nr_class - 1)*l; i++)// // alpha[i] = 0;// // for(i=0; i<w_size; i++)// // w[i] = 0;// // for(i=0; i<nr_class-1; i++)// // b[i] = 0;// for(i=0; i<l; i++) // {// feature_node * const xi = prob->x[i];// double xi_square = sparse_operator::nrm2_sq(xi);// QD[i] = xi_square+1; // add// for(k= 0; k<nr_class-1; k++)// {// //int y_i_k = calculate_y_k(y[i], k+1); // //QD[k*l + i] = y_i_k*y_i_k*(xi_square+1);// // QD[k*l + i] = xi_square+1;// // sparse_operator::axpy(y_i_k*alpha[i*nr_class + k], xi, w);// index[k*l + i] = k*l + i;// }// } // int kk, ss;//kk(k‘),ss(s) in the paper// // int i0,kk0,ss0,t;// while (iter < max_iter)// {// PGmax_new = -INF;// PGmin_new = INF;// for (i=0; i<active_size; i++)// {// j = i+rand()%(active_size-i);// swap(index[i], index[j]);// }// for (s=0; s<active_size; s++)// {// i = index[s];// kk = i/l;//k‘// ss = i%l;//s// int y_s_ksign = calculate_y_k(y[ss], kk);//ysk‘// feature_node * const xss = prob->x[ss];// G = y_s_ksign*(sparse_operator::dot(w, xss)+b[kk])-1;// //C = upper_bound[GETI(i)];// //G += alpha[i]*diag[GETI(i)];// PG = 0;// double upper_bound = C*compute_nu_i_k(y[ss], kk, power);// if (alpha[i] == 0)// {// if (G > PGmax_old)// {// active_size--;// swap(index[s], index[active_size]);// s--;// // add begin// // if(kk<nr_class-1)// // for(t=s;t<active_size;t++)// // {// // i0 = index[t];// // kk0 = i0/l;//k‘// // ss0 = i0%l;//s// // if(ss==ss0 && kk0>kk)// // {// // active_size--;// // swap(index[t], index[active_size]);// // n0++;// // }// // if(n0==(nr_class-1-kk0))break;// // }// //end// continue;// }// else if (G < 0)// PG = G;// }// else if (alpha[i] == upper_bound)// {// if (G < PGmin_old)// {// active_size--;// swap(index[s], index[active_size]);// s--;// // add begin// // if(kk>0)// // { n0=0;// // for(t=s;t<active_size;t++)// // {// // i0 = index[t];// // kk0 = i0/l;//k‘// // ss0 = i0%l;//s// // if(ss==ss0 && kk0<kk)// // {// // active_size--;// // swap(index[t], index[active_size]);// // n0++;// // }// // if(n0==(nr_class-1-kk0))break;// // }// // }// //end // continue;// }// else if (G > 0)// PG = G;// }// else// PG = G;// PGmax_new = max(PGmax_new, PG);// PGmin_new = min(PGmin_new, PG);// if(fabs(PG) > 1.0e-12)// {// double alpha_old = alpha[i];// alpha[i] = min(max(alpha[i] - G/QD[ss], 0.0), upper_bound);// d = (alpha[i] - alpha_old)*y_s_ksign;// sparse_operator::axpy(d, xss, w);// b[kk] += d;// }// }// // printf("%d ",active_size);// iter++;// if(iter % 10 == 0)// info(".");// if(PGmax_new - PGmin_new <= eps)// {// if(active_size == (nr_class - 1)*l)// break;// else// {// active_size = (nr_class - 1)*l;// info("*");// PGmax_old = INF;// PGmin_old = -INF;// continue;// }// }// PGmax_old = PGmax_new;// PGmin_old = PGmin_new;// if (PGmax_old <= 0)// PGmax_old = INF;// if (PGmin_old >= 0)// PGmin_old = -INF;// }// info("\noptimization finished, #iter = %d\n",iter);// // for(i=0;i<nr_class-1;i++) info("%.6f\n",b[i]);// if (iter >= max_iter)// info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");// // calculate objective value// //double v = 0;// /*// for(i=0; i<w_size; i++)// v += w[i]*w[i];// for(i=0; i<(nr_class - 1)*l; i++)// {// v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);// if(alpha[i] > 0)// ++nSV;// }// info("Objective value = http://www.mamicode.com/%lf/n",v/2);// */// int nSV = 0;// for(i=0; i<l; i++) // {// double alpha_i = 0;// for(k= 0; k<nr_class-1; k++)// {// alpha_i += alpha[k*l + i];// }// if(alpha_i > 0)// ++nSV;// }// info("nSV = %d\n",nSV);// delete [] QD;// delete [] alpha;// delete [] y;// delete [] index;// // for(i=0;i<prob->n;i++)// // info("%.6f\n",w[i]);// // for(i=0;i<nr_class-1;i++)// // info("b%.6f\n",b[i]); // }static void solve_l2r_svor_full(const problem *prob, const parameter *param, double *w, double *b, int *label, int nr_class)//cost refers to power{ int i, j, k, s, iter = 0; int l = prob->l; int bigl = l * (nr_class-1); schar *y = new schar[bigl]; double eps = param->eps; double C = param->C; // int w_size = prob->n; double d, G; double *QD = new double[bigl]; int max_iter = 1000; int *index = new int[bigl]; double *alpha = new double[bigl]; int active_size = bigl; // PG: projected gradient, for shrinking and stopping double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration int idx = 0; for(i=0;i<l;i++) { feature_node * const xi = prob->x[i]; double xi_square = sparse_operator::nrm2_sq(xi); for(k=0;k<nr_class -1;k++) { alpha[idx] = 0; QD[idx] = xi_square+1; y[idx] = (prob->y[i] <= label[k] ? -1 : 1); index[idx] = idx; idx++; } } // memset(alpha,0,sizeof(double)*(bigl)); int kk, ss;//kk(k‘),ss(s) in the paper // int i0,kk0,ss0,t; while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (i=0; i<active_size; i++) { j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; ss = i/(nr_class-1);// kk = i%(nr_class-1);//s feature_node * const xss = prob->x[ss]; G = y[i]*(sparse_operator::dot(w, xss)+b[kk])-1; //C = upper_bound[GETI(i)]; //G += alpha[i]*diag[GETI(i)]; double violation = 0; double upper_bound = C; if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == upper_bound) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(G) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), upper_bound); d = y[i]*(alpha[i] - alpha_old); sparse_operator::axpy(d, xss, w); b[kk] += d; } } // printf("%d ",active_size); if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == bigl) break; else { active_size = bigl; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n",iter); // for(i=0;i<nr_class-1;i++) info("%.6f\n",b[i]); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value //double v = 0;/* for(i=0; i<w_size; i++) v += w[i]*w[i]; for(i=0; i<(nr_class - 1)*l; i++) { v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = http://www.mamicode.com/%lf/n",v/2);*/ int nSV = 0; for(i=0; i<l; i++) { double alpha_i = 0; for(k= 0; k<nr_class-1; k++) { alpha_i += alpha[i*(nr_class-1) + k]; } if(alpha_i > 0) ++nSV; } info("nSV = %d\n",nSV); delete [] QD; delete [] alpha; delete [] y; delete [] index; }static void solve_l2r_npsvor_full( const problem *prob, double *w, const parameter *param, int k, int nk){ int l = prob->l; double C1 = param->C1; double C2 = param->C2; double p = param->p; double eps = param->eps; int w_size = prob->n; int i,i0, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 1000; int *index = new int[l+nk]; int *yk = new int[l+nk]; double *alpha = new double[l+nk]; double *y = prob->y; int active_size = l+nk; int *index0 = new int[l+nk]; // PG: projected gradient, for shrinking and stopping double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration // C2 = nk/(l-nk)*C2; // Initial alpha can be set here. Note that // 0 <= alpha[i] <= upper_bound[GETI(i)] memset(alpha,0,sizeof(double)*(l+nk)); memset(w,0,sizeof(double)*w_size); int j=0; for(i=0; i<l; i++) { feature_node * const xi = prob->x[i]; QD[i] = sparse_operator::nrm2_sq(xi); index[i] = i; index0[i] = i; if(y[i]<k) yk[i]=-1; else if(y[i]==k) { yk[i] = -1; yk[l+j] =1; index[l+j] = l+j; index0[l+j] = i; j++; } else yk[i] = 1; } while (iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; i0 = index0[i]; feature_node * const xi = prob->x[i0]; G = yk[i]*sparse_operator::dot(w, xi); if(y[i0]!=k) {G += -1; C = C1;} else {G += p; C = C2;} double violation = 0; if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == C) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(G) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i0], 0.0), C); d = (alpha[i] - alpha_old)*yk[i]; sparse_operator::axpy(d, xi, w); } } // printf("%d\n",nk); if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; // printf("%.3f ",max(PGmax_new,-PGmin_new)); if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == l) break; else { active_size = l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; v = 0.5*v; j=0; for(i=0; i<l; i++) { if(y[i]== k) { v += p*(alpha[i]+alpha[l+j]); if((alpha[i]-alpha[l+j]) != 0) nSV++; j++; } else { v += - alpha[i]; if(alpha[i] != 0) nSV++; } } info("Objective value = http://www.mamicode.com/%lf/n",v/2); info(" %f \n",(double)nSV*100/l); delete [] QD; delete [] alpha; delete [] index; delete [] yk; delete [] index0;}// static void solve_l2r_npsvor_full(// const problem *prob, double *w, const parameter *param,// int k, int nk)// {// int l = prob->l;// double C1 = param->C1;// double C2 = param->C2;// double p = param->p;// double eps = param->eps; // int w_size = prob->n;// int i,i0, s, iter = 0;// double C, d, G;// double *QD = new double[l];// int max_iter = 1000;// int *index = new int[l+nk];// int *yk = new int[l+nk];// double *alpha = new double[l+nk];// double *y = prob->y;// int active_size = l+nk;// int *index0 = new int[l+nk];// // PG: projected gradient, for shrinking and stopping// double PG;// double PGmax_old = INF;// double PGmin_old = -INF;// double PGmax_new, PGmin_new;// // C2 = nk/(l-nk)*C2; // // Initial alpha can be set here. Note that// // 0 <= alpha[i] <= upper_bound[GETI(i)]// memset(alpha,0,sizeof(double)*(l+nk));// memset(w,0,sizeof(double)*w_size);// int j=0;// for(i=0; i<l; i++)// {// feature_node * const xi = prob->x[i];// QD[i] = sparse_operator::nrm2_sq(xi);// index[i] = i;// index0[i] = i;// if(y[i]<k)// yk[i]=-1;// else if(y[i]==k) // {// yk[i] = -1;// yk[l+j] =1;// index[l+j] = l+j;// index0[l+j] = i;// j++;// }// else yk[i] = 1;// }// while (iter < max_iter)// {// PGmax_new = -INF;// PGmin_new = INF;// for (i=0; i<active_size; i++)// {// int j = i+rand()%(active_size-i);// swap(index[i], index[j]);// }// for (s=0; s<active_size; s++)// {// i = index[s];// i0 = index0[i];// feature_node * const xi = prob->x[i0];// G = yk[i]*sparse_operator::dot(w, xi);// if(y[i0]!=k)// {G += -1; C = C1;}// else// {G += p; C = C2;}// PG = 0;// if (alpha[i] == 0)// {// if (G > PGmax_old)// {// active_size--;// swap(index[s], index[active_size]);// s--;// continue;// }// else if (G < 0)// PG = G;// }// else if (alpha[i] == C)// {// if (G < PGmin_old)// {// active_size--;// swap(index[s], index[active_size]);// s--;// continue;// }// else if (G > 0)// PG = G;// }// else// PG = G;// PGmax_new = max(PGmax_new, PG);// PGmin_new = min(PGmin_new, PG);// if(fabs(PG) > 1.0e-12)// {// double alpha_old = alpha[i];// alpha[i] = min(max(alpha[i] - G/QD[i0], 0.0), C);// d = (alpha[i] - alpha_old)*yk[i];// sparse_operator::axpy(d, xi, w);// }// }// // printf("%d\n",nk);// iter++;// // printf("%.3f ",max(PGmax_new,-PGmin_new));// if(iter % 10 == 0)// info(".");// if(PGmax_new - PGmin_new <= eps)// {// if(active_size == l)// break;// else// {// active_size = l;// info("*");// PGmax_old = INF;// PGmin_old = -INF;// continue;// }// }// PGmax_old = PGmax_new;// PGmin_old = PGmin_new;// if (PGmax_old <= 0)// PGmax_old = INF;// if (PGmin_old >= 0)// PGmin_old = -INF;// }// info("\noptimization finished, #iter = %d\n",iter);// if (iter >= max_iter)// info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");// // calculate objective value// // calculate objective value// double v = 0;// int nSV = 0;// for(i=0; i<w_size; i++)// v += w[i]*w[i];// v = 0.5*v;// j=0;// for(i=0; i<l; i++)// {// if(y[i]== k)// {// v += p*(alpha[i]+alpha[l+j]);// if((alpha[i]-alpha[l+j]) != 0)// nSV++;// j++;// }// else// {// v += - alpha[i];// if(alpha[i] != 0)// nSV++;// }// }// info("Objective value = http://www.mamicode.com/%lf/n",v/2);// info(" %f \n",(double)nSV*100/l);// delete [] QD;// delete [] alpha;// delete [] index;// delete [] yk;// delete [] index0;// }int calculate_yki(double y, int k){ if(y>k){ return 1; }else{ return -1; }}double npsvor_obj_value(const problem *prob,double *w,double *alpha, double p, int k){ // calculate objective value double v = 0; double *y = prob->y; int i, l = prob->l,w_size = prob->n; for(i=0; i<w_size; i++) v += w[i]*w[i]; v = 0.5*v; for(i=0; i<l; i++) { if(y[i]== k) v += p*fabs(alpha[i]); else v += - alpha[i]; } return v; }static void solve_l2r_npsvor( const problem *prob, double *w, const parameter *param, int k){ int l = prob->l; double C1 = param->C1; double C2 = param->C2; double p = param->p; int w_size = prob->n; double eps = param->eps; int i, s, iter = 0; int max_iter = 1000; int active_size = l; int *index = new int[l]; double *obj_value = http://www.mamicode.com/new double[max_iter];"%.3f %.3f\n",C1,C2); // int nk=0; for(i=0; i<l; i++) { feature_node * const xi = prob->x[i]; QD[i] = sparse_operator::nrm2_sq(xi); // sparse_operator::axpy(beta[i], xi, w); index[i] = i; // if(y[i]==k) // nk++; } // p = (double)nk/l*p; while(iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for(i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0; s<active_size; s++) { i = index[s]; int yki = calculate_yki(y[i], k);//ysk‘ feature_node * const xi = prob->x[i]; double violation = 0; if(y[i]!= k) { G = yki*sparse_operator::dot(w, xi) -1; if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == C2) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); // PGmax_new = max(PGmax_new, PG); // PGmin_new = min(PGmin_new, PG); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(violation) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C2); d = (alpha[i] - alpha_old)*yki; sparse_operator::axpy(d, xi, w); } } else { G = yki*sparse_operator::dot(w, xi); double Gp = G+p; double Gn = G-p; if(alpha[i] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old && Gn<-Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] >= C1) { if(Gp > 0) violation = Gp; else if(Gp < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] <= -C1) { if(Gn < 0) violation = -Gn; else if(Gn > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if(Gp < QD[i]*alpha[i]) d = -Gp/QD[i]; else if(Gn > QD[i]*alpha[i]) d = -Gn/QD[i]; else d = -alpha[i]; if(fabs(violation) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i]+d, -C1), C1); d = yki*(alpha[i]-alpha_old); sparse_operator::axpy(d, xi, w); } } } if(iter == 0) Gnorm1_init = Gnorm1_new; obj_value[iter] = npsvor_obj_value(prob,w,alpha,p,k); stop=clock(); T[iter] = (double)(stop-start)/CLOCKS_PER_SEC; iter++; // printf("%.3f %.3f %.3f ",Gmax_old,Gnorm1_new,eps*Gnorm1_init); if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == l) break; else { active_size = l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; v = 0.5*v; for(i=0; i<l; i++) { if(y[i]== k) v += p*fabs(alpha[i]); else v += - alpha[i]; if(alpha[i] != 0) nSV++; } // info("Objective value = http://www.mamicode.com/%lf/n", v); // info("nSV = %d\n",nSV); // for(i=0;i<iter;i++) // printf("%.3f %.3f\n",(obj_value[i]-obj_value[iter-1])/fabs(obj_value[iter-1]),T[i]); // printf("\n"); delete [] alpha; delete [] QD; delete [] index;}static void solve_l2r_npsvor_two( const problem *prob, double *w, const parameter *param, double *QD){ int l = prob->l; double C1 = param->C1; double C2 = param->C2; double p = param->p; int w_size = prob->n; double eps = param->eps; int i, s, iter = 0; int max_iter = 1000; int active_size = l; int *index = new int[l]; // double *obj_value = http://www.mamicode.com/new double[max_iter];"%.3f %.3f\n",C1,C2); // int nk=0; for(i=0; i<l; i++) index[i] = i; while(iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for(i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0; s<active_size; s++) { i = index[s]; feature_node * const xi = prob->x[i]; double violation = 0; if(y[i]==1) { G = y[i]*sparse_operator::dot(w, xi) -1; if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == C2) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); // PGmax_new = max(PGmax_new, PG); // PGmin_new = min(PGmin_new, PG); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(violation) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C2); d = (alpha[i] - alpha_old)*y[i]; sparse_operator::axpy(d, xi, w); } } else { G = y[i]*sparse_operator::dot(w, xi); double Gp = G+p; double Gn = G-p; if(alpha[i] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old && Gn<-Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] >= C1) { if(Gp > 0) violation = Gp; else if(Gp < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] <= -C1) { if(Gn < 0) violation = -Gn; else if(Gn > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if(Gp < QD[i]*alpha[i]) d = -Gp/QD[i]; else if(Gn > QD[i]*alpha[i]) d = -Gn/QD[i]; else d = -alpha[i]; if(fabs(violation) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i]+d, -C1), C1); d = y[i]*(alpha[i]-alpha_old); sparse_operator::axpy(d, xi, w); } } } if(iter == 0) Gnorm1_init = Gnorm1_new; // obj_value[iter] = npsvor_obj_value(prob,w,alpha,p,k); stop=clock(); T[iter] = (double)(stop-start)/CLOCKS_PER_SEC; iter++; // printf("%.3f %.3f %.3f ",Gmax_old,Gnorm1_new,eps*Gnorm1_init); if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == l) break; else { active_size = l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; v = 0.5*v; for(i=0; i<l; i++) { if(y[i]== -1) v += p*fabs(alpha[i]); else v += - alpha[i]; if(alpha[i] != 0) nSV++; } // info("Objective value = http://www.mamicode.com/%lf/n", v); // info("nSV = %d\n",nSV); // for(i=0;i<iter;i++) // printf("%.3f %.3f\n",(obj_value[i]-obj_value[iter-1])/fabs(obj_value[iter-1]),T[i]); // printf("\n"); delete [] alpha; delete [] index;}static void sub_ramp_npsvor( const problem *prob, double *w, const parameter *param, double *alpha, double *QD, int k){ int l = prob->l; double C1 = param->C1; double C2 = param->C2; double p = param->p; int w_size = prob->n; double eps = param->eps; int i, s, iter = 0; int max_iter = 1000; int active_size = l; int *index = new int[l]; double *obj_value = http://www.mamicode.com/new double[max_iter];"%.3f %.3f\n",C1,C2); // memset(w,0,sizeof(double)*w_size); // memset(alpha,0,sizeof(double)*l); for(i=0; i<l; i++) { // double yi = (double)calculate_yki(y[i], k); index[i] = i; // sparse_operator::axpy(yi*(alpha[i]+delta[i]), prob->x[i], w); } while(iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for(i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0; s<active_size; s++) { i = index[s]; int yki = calculate_yki(y[i], k);//ysk‘ feature_node * const xi = prob->x[i]; double violation = 0; if(y[i]!= k) { G = yki*sparse_operator::dot(w, xi) -1; if (alpha[i] == 0) { if (G > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) violation = -G; } else if (alpha[i] == C2) { if (G < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) violation = G; } else violation = fabs(G); // PGmax_new = max(PGmax_new, PG); // PGmin_new = min(PGmin_new, PG); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; if(fabs(violation) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C2); d = (alpha[i] - alpha_old)*yki; sparse_operator::axpy(d, xi, w); } } else { G = yki*sparse_operator::dot(w, xi); double Gp = G+p; double Gn = G-p; if(alpha[i] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old && Gn<-Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] >= C1) { if(Gp > 0) violation = Gp; else if(Gp < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] <= -C1) { if(Gn < 0) violation = -Gn; else if(Gn > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(alpha[i] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if(Gp < QD[i]*alpha[i]) d = -Gp/QD[i]; else if(Gn > QD[i]*alpha[i]) d = -Gn/QD[i]; else d = -alpha[i]; if(fabs(violation) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i]+d, -C1), C1); d = yki*(alpha[i]-alpha_old); sparse_operator::axpy(d, xi, w); } } } if(iter == 0) Gnorm1_init = Gnorm1_new; obj_value[iter] = npsvor_obj_value(prob,w,alpha,p,k); stop=clock(); T[iter] = (double)(stop-start)/CLOCKS_PER_SEC; iter++; // printf("%.3f %.3f %.3f ",Gmax_old,Gnorm1_new,eps*Gnorm1_init); if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == l) break; else { active_size = l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; v = 0.5*v; for(i=0; i<l; i++) { if(y[i]== k) v += p*fabs(alpha[i]); else v += - alpha[i]; if(alpha[i] != 0) nSV++; } // info("Objective value = http://www.mamicode.com/%lf/n", v); // info("nSV = %d\n",nSV); // for(i=0;i<iter;i++) // printf("%.3f %.3f\n",(obj_value[i]-obj_value[iter-1])/fabs(obj_value[iter-1]),T[i]); // printf("\n"); // delete [] alpha; // delete [] QD; delete [] index;}static void ramp_npsvor( const problem *prob, double *w, const parameter *param, int k){ int l = prob->l; double C1 = param->C1; double C2 = param->C2; int w_size = prob->n; double *alpha = new double[l]; double *delta = new double[l]; // double *w0= new double[w_size]; memset(alpha,0,sizeof(double)*l); memset(delta,0,sizeof(double)*l); memset(w,0,sizeof(double)*w_size); int iter = 0, maxiter = 4; double *y = prob->y; double HB; double hinge_s = -1, Ins_t = 2; double *QD = new double[l]; int i; for(i=0; i<l; i++) QD[i] = sparse_operator::nrm2_sq(prob->x[i]); while(iter<maxiter) { for(i=0; i<l; i++) { double yi =(double) calculate_yki(y[i], k); HB = sparse_operator::dot(w, prob->x[i]); if(yi*HB<hinge_s && y[i]!= k) delta[i] = -C2; else if(HB> Ins_t && y[i]== k) delta[i] = -C1; else if(HB< -Ins_t && y[i]== k) delta[i] = C1; else delta[i] = 0; // info("%.1f\n ", delta[i]); } memset(w,0,sizeof(double)*w_size); for(i=0; i<l; i++) { double yi =(double) calculate_yki(y[i], k); sparse_operator::axpy(yi*(alpha[i]+delta[i]), prob->x[i], w); } sub_ramp_npsvor(prob, w, param, alpha, QD, k); // if(r_norm<eps_pri) // break; iter++; } delete [] alpha; delete [] delta; delete [] QD;}double Update_W(const parameter *param, double *W, double *U,double *Z, int nr_class, int n){ int i,j; double coef,s0=0,W_old; double w_hat; coef = n*param->rho/((1/param->C)+ n*param->rho); for(i=0;i<n;i++) { W_old = W[i]; w_hat = 0; for(j=0;j<nr_class-1;j++) w_hat = w_hat + Z[j*n+i]+U[j*n+i]; W[i] = coef*w_hat/(nr_class-1); s0 =s0+ (W[i] - W_old)*(W[i] - W_old); } // for(i=0;i<10;i++) info("%.3f ",W[i]); return param->rho*sqrt(s0);}static void Update_U(double *U, double *Z,double *W, int nr_class, int n){ int i,j; for(j=0;j<nr_class-1;j++) { for(i=0;i<n;i++) { U[j*n+i] = U[j*n+i] + Z[j*n+i]-W[i]; } }}// //b is the returned value.// double subproblem(const problem *prob, const parameter *param, double *w,//vector y is the processed labels// double *z, double *u, double *alpha, double *nu, int j)static void subproblem(const problem *prob, const parameter *param, double *W, double *bias, double *Z, double *U, double *Alpha, int *Index, int *Active_size, int k){ // int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double d, G;//C, double *QD = Malloc(double, prob->l); int max_iter = 1000; int *index = Malloc(int, prob->l); //double *alpha = new double[l]; //schar *y = new schar[l]; // int active_size = prob->l; int active_size = Active_size[k]; double power = param->wl; double *y = Malloc(double, prob->l); double *nu = Malloc(double, prob->l); double b=bias[k]; double *z = Malloc(double, prob->n); double *alpha = Malloc(double, prob->l); // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new;/* // default solver_type: L2R_L2LOSS_SVC_DUAL double diag[3] = {0.5/Cn, 0, 0.5/Cp}; double upper_bound[3] = {INF, 0, INF}; if(solver_type == L2R_L1LOSS_SVC_DUAL) { diag[0] = 0; diag[2] = 0; upper_bound[0] = Cn; upper_bound[2] = Cp; } for(i=0; i<l; i++) { if(prob->y[i] > 0) { y[i] = +1; } else { y[i] = -1; } }*/ // Initial alpha can be set here. Note that // 0 <= alpha[i] <= upper_bound[GETI(i)] //for(i=0; i<l; i++) // alpha[i] = 0; // int pos=0, neg=0; for(i=0;i<prob->l;i++) { y[i] = calculate_y_k(prob->y[i], k); nu[i] = compute_nu_i_k(prob->y[i], k, power); // if(y[i]==1) // pos = pos+1; // else // neg = neg+1; } // info("pos%d neg%d",pos,neg); for(i=0; i<prob->n; i++){ z[i] = W[i] - U[k*w_size+i]; } for(i=0; i<prob->l; i++) { //QD[i] = diag[GETI(i)]; alpha[i] = Alpha[k*prob->l+i]; feature_node * const xi = prob->x[i]; //QD[i] += y[i]*y[i]*(sparse_operator::nrm2_sq(xi)/rho+1); QD[i] = sparse_operator::nrm2_sq(xi)/param->rho+1; sparse_operator::axpy(y[i]*alpha[i]/param->rho, xi, z); b += y[i]*alpha[i]; // index[i] = i; index[i] = Index[k*prob->l+i]; } while (iter < max_iter) { PGmax_new = -INF; PGmin_new = INF; for (i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for (s=0; s<active_size; s++) { i = index[s]; const double yi = y[i]; feature_node * const xi = prob->x[i]; G = yi*(sparse_operator::dot(z, xi) + b)-1; //C = upper_bound[GETI(i)]; //G += alpha[i]*diag[GETI(i)]; PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) PG = G; } else if (alpha[i] == param->C*nu[i]) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) PG = G; } else PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), param->C*nu[i]); d = (alpha[i] - alpha_old)*yi; sparse_operator::axpy(d/param->rho, xi, z); b += d; } } iter++; if(iter % 10 == 0) info("."); if(PGmax_new - PGmin_new <= param->eps) { if(active_size == prob->l) break; else { active_size = prob->l; info("*"); PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = INF; if (PGmin_old >= 0) PGmin_old = -INF; } bias[k] = b; for(i=0;i<prob->l;i++) {Alpha[k*prob->l+i] = alpha[i]; Index[k*prob->l+i] = index[i]; } for(i=0;i<prob->n;i++) Z[k*prob->n+i] = z[i]; Active_size[k]=active_size; info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value/* double v = 0; int nSV = 0; for(i=0; i<w_size; i++) / v += w[i]*w[i]; for(i=0; i<l; i++) { v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); if(alpha[i] > 0) ++nSV; } info("Objective value = http://www.mamicode.com/%lf/n",v/2); info("nSV = %d\n",nSV);*/ delete [] alpha; delete [] z; delete [] QD; delete [] y; delete [] nu; //delete [] alpha; //delete [] y; delete [] index;}double norm2_diff(double *Z, double *W, int nr_class, int n)//norm2-square{ int i,j; double ret = 0; for(j=0;j<nr_class-1;j++) { for(i=0;i<n;i++) ret = ret+ (Z[j*n+i]-W[i])*(Z[j*n+i]-W[i]); } return (sqrt(ret));}double norm2(double *Z, int n)//norm2-square{ int j; double ret = 0; for(j=0;j<n;j++) { ret = ret + Z[j]*Z[j]; } return (sqrt(ret));}static void solve_l2r_svor_admm(const problem *prob,const parameter *param, double *W, double *b, int nr_class){ int i,j,iter=0; int max_iter = 50; // int max_nr_class = 16;//max number of classes double *Z = Malloc(double, (nr_class-1)*prob->n); double *U = Malloc(double, (nr_class-1)*prob->n); double *alpha = Malloc(double, (nr_class-1)*prob->l); double s_norm,r_norm; double ABSTOL=10e-5, RELTOL=10e-5; double eps_pri,eps_dual; int *Index=Malloc(int, (nr_class-1)*prob->l); int *Active_size = new int[nr_class-1]; memset(Z,0,sizeof(double)*((nr_class - 1)*prob->n)); memset(U,0,sizeof(double)*((nr_class - 1)*prob->n)); memset(W,0,sizeof(double)*(prob->n)); // memset(alpha,0,sizeof(double)*((nr_class - 1)*prob->l)); // for(i=0;i<prob->n;i++) // { // for(j=0;j<nr_class-1;j++) // { // Z[j*prob->n+i] = 0; // U[j*prob->n+i] = 0; // } // W[i] = 0; // } for(i=0;i<(nr_class-1);i++) for(j=0;j<prob->l;j++) { alpha[i*prob->l+j] = 0; Index[i*prob->l+j] = j; } while(iter < max_iter) {//ADMM for(j = 0; j < nr_class - 1; j++) { if(iter==0) Active_size[j]=prob->l; subproblem(prob, param, W, b, Z, U, alpha, Index, Active_size, j); } // for(i=0;i<10;i++) info("%.3f ",W[i]); s_norm = Update_W(param, W, U,Z, nr_class, prob->n); //Average nr_class vectors Z to W Update_U(U, Z, W, nr_class, prob->n); r_norm = norm2_diff(Z, W, nr_class, prob->n); eps_pri = sqrt(prob->n)*ABSTOL + RELTOL*max(norm2(W,prob->n)*(nr_class-1), norm2(Z, prob->n*(nr_class-1))); eps_dual = sqrt(prob->n)*ABSTOL + RELTOL*param->rho*norm2(U,prob->n*(nr_class-1)); if(r_norm<eps_pri && s_norm<eps_dual) break; iter++; } delete [] Z; delete [] U; delete [] alpha;}/** ------------------modification end---------------------------*/// A coordinate descent algorithm for // L1-loss and L2-loss epsilon-SVR dual problem//// min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,// s.t. -upper_bound_i <= \beta_i <= upper_bound_i,// // where Qij = xi^T xj and// D is a diagonal matrix //// In L1-SVM case:// upper_bound_i = C// lambda_i = 0// In L2-SVM case:// upper_bound_i = INF// lambda_i = 1/(2*C)//// Given: // x, y, p, C// eps is the stopping tolerance//// solution will be put in w//// See Algorithm 4 of Ho and Lin, 2012 #undef GETI#define GETI(i) (0)// To support weights for instances, use GETI(i) (i)static void solve_l2r_l1l2_svr( const problem *prob, double *w, const parameter *param, int solver_type){ int l = prob->l; double C = param->C; double p = param->p; int w_size = prob->n; double eps = param->eps; int i, s, iter = 0; int max_iter = 1000; int active_size = l; int *index = new int[l]; double d, G, H; double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration double *beta = new double[l]; double *QD = new double[l]; double *y = prob->y; // L2R_L2LOSS_SVR_DUAL double lambda[1], upper_bound[1]; lambda[0] = 0.5/C; upper_bound[0] = INF; if(solver_type == L2R_L1LOSS_SVR_DUAL) { lambda[0] = 0; upper_bound[0] = C; } // Initial beta can be set here. Note that // -upper_bound <= beta[i] <= upper_bound for(i=0; i<l; i++) beta[i] = 0; for(i=0; i<w_size; i++) w[i] = 0; for(i=0; i<l; i++) { feature_node * const xi = prob->x[i]; QD[i] = sparse_operator::nrm2_sq(xi); sparse_operator::axpy(beta[i], xi, w); index[i] = i; } while(iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for(i=0; i<active_size; i++) { int j = i+rand()%(active_size-i); swap(index[i], index[j]); } for(s=0; s<active_size; s++) { i = index[s]; G = -y[i] + lambda[GETI(i)]*beta[i]; H = QD[i] + lambda[GETI(i)]; feature_node * const xi = prob->x[i]; G += sparse_operator::dot(w, xi); double Gp = G+p; double Gn = G-p; double violation = 0; if(beta[i] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old && Gn<-Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(beta[i] >= upper_bound[GETI(i)]) { if(Gp > 0) violation = Gp; else if(Gp < -Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(beta[i] <= -upper_bound[GETI(i)]) { if(Gn < 0) violation = -Gn; else if(Gn > Gmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(beta[i] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if(Gp < H*beta[i]) d = -Gp/H; else if(Gn > H*beta[i]) d = -Gn/H; else d = -beta[i]; if(fabs(d) < 1.0e-12) continue; double beta_old = beta[i]; beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]); d = beta[i]-beta_old; if(d != 0) sparse_operator::axpy(d, xi, w); } if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; // printf("%.3f %.3f %.3f ",Gmax_old,Gnorm1_new,eps*Gnorm1_init); if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == l) break; else { active_size = l; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) v += w[i]*w[i]; v = 0.5*v; for(i=0; i<l; i++) { v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i]; if(beta[i] != 0) nSV++; } info("Objective value = http://www.mamicode.com/%lf/n", v); info("nSV = %d\n",nSV); delete [] beta; delete [] QD; delete [] index;}// A coordinate descent algorithm for // the dual of L2-regularized logistic regression problems//// min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),// s.t. 0 <= \alpha_i <= upper_bound_i,// // where Qij = yi yj xi^T xj and // upper_bound_i = Cp if y_i = 1// upper_bound_i = Cn if y_i = -1//// Given: // x, y, Cp, Cn// eps is the stopping tolerance//// solution will be put in w//// See Algorithm 5 of Yu et al., MLJ 2010#undef GETI#define GETI(i) (y[i]+1)// To support weights for instances, use GETI(i) (i)void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn){ int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double *xTx = new double[l]; int max_iter = 1000; int *index = new int[l]; double *alpha = new double[2*l]; // store alpha and C - alpha schar *y = new schar[l]; int max_inner_iter = 100; // for inner Newton double innereps = 1e-2; double innereps_min = min(1e-8, eps); double upper_bound[3] = {Cn, 0, Cp}; for(i=0; i<l; i++) { if(prob->y[i] > 0) { y[i] = +1; } else { y[i] = -1; } } // Initial alpha can be set here. Note that // 0 < alpha[i] < upper_bound[GETI(i)] // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)] for(i=0; i<l; i++) { alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8); alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i]; } for(i=0; i<w_size; i++) w[i] = 0; for(i=0; i<l; i++) { feature_node * const xi = prob->x[i]; xTx[i] = sparse_operator::nrm2_sq(xi); sparse_operator::axpy(y[i]*alpha[2*i], xi, w); index[i] = i; } while (iter < max_iter) { for (i=0; i<l; i++) { int j = i+rand()%(l-i); swap(index[i], index[j]); } int newton_iter = 0; double Gmax = 0; for (s=0; s<l; s++) { i = index[s]; const schar yi = y[i]; double C = upper_bound[GETI(i)]; double ywTx = 0, xisq = xTx[i]; feature_node * const xi = prob->x[i]; ywTx = yi*sparse_operator::dot(w, xi); double a = xisq, b = ywTx; // Decide to minimize g_1(z) or g_2(z) int ind1 = 2*i, ind2 = 2*i+1, sign = 1; if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) { ind1 = 2*i+1; ind2 = 2*i; sign = -1; } // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old) double alpha_old = alpha[ind1]; double z = alpha_old; if(C - z < 0.5 * C) z = 0.1*z; double gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); Gmax = max(Gmax, fabs(gp)); // Newton method on the sub-problem const double eta = 0.1; // xi in the paper int inner_iter = 0; while (inner_iter <= max_inner_iter) { if(fabs(gp) < innereps) break; double gpp = a + C/(C-z)/z; double tmpz = z - gp/gpp; if(tmpz <= 0) z *= eta; else // tmpz in (0, C) z = tmpz; gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); newton_iter++; inner_iter++; } if(inner_iter > 0) // update w { alpha[ind1] = z; alpha[ind2] = C-z; sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w); } } iter++; if(iter % 10 == 0) info("."); if(Gmax < eps) break; if(newton_iter <= l/10) innereps = max(innereps_min, 0.1*innereps); } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; for(i=0; i<w_size; i++) v += w[i] * w[i]; v *= 0.5; for(i=0; i<l; i++) v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]); info("Objective value = http://www.mamicode.com/%lf/n", v); delete [] xTx; delete [] alpha; delete [] y; delete [] index;}// A coordinate descent algorithm for // L1-regularized L2-loss support vector classification//// min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,//// Given: // x, y, Cp, Cn// eps is the stopping tolerance//// solution will be put in w//// See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)#undef GETI#define GETI(i) (y[i]+1)// To support weights for instances, use GETI(i) (i)static void solve_l1r_l2_svc( problem *prob_col, double *w, double eps, double Cp, double Cn){ int l = prob_col->l; int w_size = prob_col->n; int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = INF; double Gmax_new, Gnorm1_new; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration double d_old, d_diff; double loss_old, loss_new; double appxcond, cond; int *index = new int[w_size]; schar *y = new schar[l]; double *b = new double[l]; // b = 1-ywTx double *xj_sq = new double[w_size]; feature_node *x; double C[3] = {Cn,0,Cp}; // Initial w can be set here. for(j=0; j<w_size; j++) w[j] = 0; for(j=0; j<l; j++) { b[j] = 1; if(prob_col->y[j] > 0) y[j] = 1; else y[j] = -1; } for(j=0; j<w_size; j++) { index[j] = j; xj_sq[j] = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; x->value *= y[ind]; // x->value stores yi*xij double val = x->value; b[ind] -= w[j]*val; xj_sq[j] += C[GETI(ind)]*val*val; x++; } } while(iter < max_iter) { Gmax_new = 0; Gnorm1_new = 0; for(j=0; j<active_size; j++) { int i = j+rand()%(active_size-j); swap(index[i], index[j]); } for(s=0; s<active_size; s++) { j = index[s]; G_loss = 0; H = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; if(b[ind] > 0) { double val = x->value; double tmp = C[GETI(ind)]*val; G_loss -= tmp*b[ind]; H += tmp*val; } x++; } G_loss *= 2; G = G_loss; H *= 2; H = max(H, 1e-12); double Gp = G+1; double Gn = G-1; double violation = 0; if(w[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(w[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; // obtain Newton direction d if(Gp < H*w[j]) d = -Gp/H; else if(Gn > H*w[j]) d = -Gn/H; else d = -w[j]; if(fabs(d) < 1.0e-12) continue; double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; d_old = 0; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; appxcond = xj_sq[j]*d*d + G_loss*d + cond; if(appxcond <= 0) { x = prob_col->x[j]; sparse_operator::axpy(d_diff, x, b); break; } if(num_linesearch == 0) { loss_old = 0; loss_new = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; if(b[ind] > 0) loss_old += C[GETI(ind)]*b[ind]*b[ind]; double b_new = b[ind] + d_diff*x->value; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; x++; } } else { loss_new = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; double b_new = b[ind] + d_diff*x->value; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; x++; } } cond = cond + loss_new - loss_old; if(cond <= 0) break; else { d_old = d; d *= 0.5; delta *= 0.5; } } w[j] += d; // recompute b[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { info("#"); for(int i=0; i<l; i++) b[i] = 1; for(int i=0; i<w_size; i++) { if(w[i]==0) continue; x = prob_col->x[i]; sparse_operator::axpy(-w[i], x, b); } } } if(iter == 0) Gnorm1_init = Gnorm1_new; iter++; if(iter % 10 == 0) info("."); if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == w_size) break; else { active_size = w_size; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; j<w_size; j++) { x = prob_col->x[j]; while(x->index != -1) { x->value *= prob_col->y[x->index-1]; // restore x->value x++; } if(w[j] != 0) { v += fabs(w[j]); nnz++; } } for(j=0; j<l; j++) if(b[j] > 0) v += C[GETI(j)]*b[j]*b[j]; info("Objective value = http://www.mamicode.com/%lf/n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] index; delete [] y; delete [] b; delete [] xj_sq;}// A coordinate descent algorithm for // L1-regularized logistic regression problems//// min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),//// Given: // x, y, Cp, Cn// eps is the stopping tolerance//// solution will be put in w//// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)#undef GETI#define GETI(i) (y[i]+1)// To support weights for instances, use GETI(i) (i)static void solve_l1r_lr( const problem *prob_col, double *w, double eps, double Cp, double Cn){ int l = prob_col->l; int w_size = prob_col->n; int j, s, newton_iter=0, iter=0; int max_newton_iter = 100; int max_iter = 1000; int max_num_linesearch = 20; int active_size; int QP_active_size; double nu = 1e-12; double inner_eps = 1; double sigma = 0.01; double w_norm, w_norm_new; double z, G, H; double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration double Gmax_old = INF; double Gmax_new, Gnorm1_new; double QP_Gmax_old = INF; double QP_Gmax_new, QP_Gnorm1_new; double delta, negsum_xTd, cond; int *index = new int[w_size]; schar *y = new schar[l]; double *Hdiag = new double[w_size]; double *Grad = new double[w_size]; double *wpd = new double[w_size]; double *xjneg_sum = new double[w_size]; double *xTd = new double[l]; double *exp_wTx = new double[l]; double *exp_wTx_new = new double[l]; double *tau = new double[l]; double *D = new double[l]; feature_node *x; double C[3] = {Cn,0,Cp}; // Initial w can be set here. for(j=0; j<w_size; j++) w[j] = 0; for(j=0; j<l; j++) { if(prob_col->y[j] > 0) y[j] = 1; else y[j] = -1; exp_wTx[j] = 0; } w_norm = 0; for(j=0; j<w_size; j++) { w_norm += fabs(w[j]); wpd[j] = w[j]; index[j] = j; xjneg_sum[j] = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; double val = x->value; exp_wTx[ind] += w[j]*val; if(y[ind] == -1) xjneg_sum[j] += C[GETI(ind)]*val; x++; } } for(j=0; j<l; j++) { exp_wTx[j] = exp(exp_wTx[j]); double tau_tmp = 1/(1+exp_wTx[j]); tau[j] = C[GETI(j)]*tau_tmp; D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp; } while(newton_iter < max_newton_iter) { Gmax_new = 0; Gnorm1_new = 0; active_size = w_size; for(s=0; s<active_size; s++) { j = index[s]; Hdiag[j] = nu; Grad[j] = 0; double tmp = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; Hdiag[j] += x->value*x->value*D[ind]; tmp += x->value*tau[ind]; x++; } Grad[j] = -tmp + xjneg_sum[j]; double Gp = Grad[j]+1; double Gn = Grad[j]-1; double violation = 0; if(w[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; //outer-level shrinking else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(w[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); Gnorm1_new += violation; } if(newton_iter == 0) Gnorm1_init = Gnorm1_new; if(Gnorm1_new <= eps*Gnorm1_init) break; iter = 0; QP_Gmax_old = INF; QP_active_size = active_size; for(int i=0; i<l; i++) xTd[i] = 0; // optimize QP over wpd while(iter < max_iter) { QP_Gmax_new = 0; QP_Gnorm1_new = 0; for(j=0; j<QP_active_size; j++) { int i = j+rand()%(QP_active_size-j); swap(index[i], index[j]); } for(s=0; s<QP_active_size; s++) { j = index[s]; H = Hdiag[j]; x = prob_col->x[j]; G = Grad[j] + (wpd[j]-w[j])*nu; while(x->index != -1) { int ind = x->index-1; G += x->value*D[ind]*xTd[ind]; x++; } double Gp = G+1; double Gn = G-1; double violation = 0; if(wpd[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; //inner-level shrinking else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) { QP_active_size--; swap(index[s], index[QP_active_size]); s--; continue; } } else if(wpd[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); QP_Gmax_new = max(QP_Gmax_new, violation); QP_Gnorm1_new += violation; // obtain solution of one-variable problem if(Gp < H*wpd[j]) z = -Gp/H; else if(Gn > H*wpd[j]) z = -Gn/H; else z = -wpd[j]; if(fabs(z) < 1.0e-12) continue; z = min(max(z,-10.0),10.0); wpd[j] += z; x = prob_col->x[j]; sparse_operator::axpy(z, x, xTd); } iter++; if(QP_Gnorm1_new <= inner_eps*Gnorm1_init) { //inner stopping if(QP_active_size == active_size) break; //active set reactivation else { QP_active_size = active_size; QP_Gmax_old = INF; continue; } } QP_Gmax_old = QP_Gmax_new; } if(iter >= max_iter) info("WARNING: reaching max number of inner iterations\n"); delta = 0; w_norm_new = 0; for(j=0; j<w_size; j++) { delta += Grad[j]*(wpd[j]-w[j]); if(wpd[j] != 0) w_norm_new += fabs(wpd[j]); } delta += (w_norm_new-w_norm); negsum_xTd = 0; for(int i=0; i<l; i++) if(y[i] == -1) negsum_xTd += C[GETI(i)]*xTd[i]; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { cond = w_norm_new - w_norm + negsum_xTd - sigma*delta; for(int i=0; i<l; i++) { double exp_xTd = exp(xTd[i]); exp_wTx_new[i] = exp_wTx[i]*exp_xTd; cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i])); } if(cond <= 0) { w_norm = w_norm_new; for(j=0; j<w_size; j++) w[j] = wpd[j]; for(int i=0; i<l; i++) { exp_wTx[i] = exp_wTx_new[i]; double tau_tmp = 1/(1+exp_wTx[i]); tau[i] = C[GETI(i)]*tau_tmp; D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp; } break; } else { w_norm_new = 0; for(j=0; j<w_size; j++) { wpd[j] = (w[j]+wpd[j])*0.5; if(wpd[j] != 0) w_norm_new += fabs(wpd[j]); } delta *= 0.5; negsum_xTd *= 0.5; for(int i=0; i<l; i++) xTd[i] *= 0.5; } } // Recompute some info due to too many line search steps if(num_linesearch >= max_num_linesearch) { for(int i=0; i<l; i++) exp_wTx[i] = 0; for(int i=0; i<w_size; i++) { if(w[i]==0) continue; x = prob_col->x[i]; sparse_operator::axpy(w[i], x, exp_wTx); } for(int i=0; i<l; i++) exp_wTx[i] = exp(exp_wTx[i]); } if(iter == 1) inner_eps *= 0.25; newton_iter++; Gmax_old = Gmax_new; info("iter %3d #CD cycles %d\n", newton_iter, iter); } info("=========================\n"); info("optimization finished, #iter = %d\n", newton_iter); if(newton_iter >= max_newton_iter) info("WARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; j<w_size; j++) if(w[j] != 0) { v += fabs(w[j]); nnz++; } for(j=0; j<l; j++) if(y[j] == 1) v += C[GETI(j)]*log(1+1/exp_wTx[j]); else v += C[GETI(j)]*log(1+exp_wTx[j]); info("Objective value = http://www.mamicode.com/%lf/n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] index; delete [] y; delete [] Hdiag; delete [] Grad; delete [] wpd; delete [] xjneg_sum; delete [] xTd; delete [] exp_wTx; delete [] exp_wTx_new; delete [] tau; delete [] D;}// transpose matrix X from row format to column formatstatic void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col){ int i; int l = prob->l; int n = prob->n; size_t nnz = 0; size_t *col_ptr = new size_t [n+1]; feature_node *x_space; prob_col->l = l; prob_col->n = n; prob_col->y = new double[l]; prob_col->x = new feature_node*[n]; for(i=0; i<l; i++) prob_col->y[i] = prob->y[i]; for(i=0; i<n+1; i++) col_ptr[i] = 0; for(i=0; i<l; i++) { feature_node *x = prob->x[i]; while(x->index != -1) { nnz++; col_ptr[x->index]++; x++; } } for(i=1; i<n+1; i++) col_ptr[i] += col_ptr[i-1] + 1; x_space = new feature_node[nnz+n]; for(i=0; i<n; i++) prob_col->x[i] = &x_space[col_ptr[i]]; for(i=0; i<l; i++) { feature_node *x = prob->x[i]; while(x->index != -1) { int ind = x->index-1; x_space[col_ptr[ind]].index = i+1; // starts from 1 x_space[col_ptr[ind]].value = http://www.mamicode.com/x->value;"My test0\n"); switch(param->solver_type) {/** -------------------my modification begin---------------------------*/// case L2R_SVOR:// {// // //fprintf(stderr, "My test1\n");// // if(Cp != Cn)//Cp and Cn are the same in this case Cp = Cn = C// // fprintf(stderr, "ERROR: Cp and Cn should be the same in this case\n");// solve_l2r_svor(prob, w, b, eps, C, para_wl)// break; // } /** -------------------my modification end---------------------------*/ case L2R_LR: { double *C = new double[prob->l]; for(int i = 0; i < prob->l; i++) { if(prob->y[i] > 0) C[i] = Cp; else C[i] = Cn; } fun_obj=new l2r_lr_fun(prob, C); TRON tron_obj(fun_obj, primal_solver_tol, eps_cg); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; delete[] C; break; } case L2R_L2LOSS_SVC: { double *C = new double[prob->l]; for(int i = 0; i < prob->l; i++) { if(prob->y[i] > 0) C[i] = Cp; else C[i] = Cn; } fun_obj=new l2r_l2_svc_fun(prob, C); TRON tron_obj(fun_obj, primal_solver_tol, eps_cg); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; delete[] C; break; } case L2R_L2LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL); break; case L2R_L1LOSS_SVC_DUAL: // solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); solve_l2r_l1l2_svmop(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); break; case L2R_SVMOP: // solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); solve_l2r_l1l2_svmop(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); break; case L1R_L2LOSS_SVC: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; break; } case L1R_LR: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; break; } case L2R_LR_DUAL: solve_l2r_lr_dual(prob, w, eps, Cp, Cn); break; case L2R_L2LOSS_SVR: { double *C = new double[prob->l]; for(int i = 0; i < prob->l; i++) C[i] = param->C; fun_obj=new l2r_l2_svr_fun(prob, C, param->p); TRON tron_obj(fun_obj, param->eps); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; delete[] C; break; } case L2R_L1LOSS_SVR_DUAL: solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL); break; case L2R_L2LOSS_SVR_DUAL: solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL); break; default: fprintf(stderr, "ERROR: unknown solver_type\n"); break; }/** -------------------my modification end---------------------------*/ //stop=clock(); //printf("Training Time:%f seconds.\n", (double)(stop-start)/CLOCKS_PER_SEC);/** -------------------my modification end---------------------------*/}// Calculate the initial C for parameter selectionstatic double calc_start_C(const problem *prob, const parameter *param){ int i; double xTx,max_xTx; max_xTx = 0; for(i=0; i<prob->l; i++) { xTx = 0; feature_node *xi=prob->x[i]; while(xi->index != -1) { double val = xi->value; xTx += val*val; xi++; } if(xTx > max_xTx) max_xTx = xTx; } double min_C = 1.0; if(param->solver_type == L2R_LR) min_C = 1.0 / (prob->l * max_xTx); else if(param->solver_type == L2R_L2LOSS_SVC) min_C = 1.0 / (2 * prob->l * max_xTx); return pow( 2, floor(log(min_C) / log(2.0)) );}//// Interface functions//model* train(const problem *prob, const parameter *param){ int i,j; int l = prob->l; int n = prob->n; int w_size = prob->n; model *model_ = Malloc(model,1); if(prob->bias>=0) model_->nr_feature=n-1; else model_->nr_feature=n; model_->param = *param; model_->bias = prob->bias; if(check_regression_model(model_)) { int nr_class; int *label = NULL; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); model_->w = Malloc(double, w_size); group_classes(prob,&nr_class,&label,&start,&count,perm); for(i=0; i<w_size; i++) model_->w[i] = 0; model_->nr_class=nr_class; model_->label = Malloc(int,nr_class); for(i=0;i<nr_class;i++) model_->label[i] = label[i]; for(i=0;i<nr_class-1;i++) for(j=i+1;j< nr_class;j++) if(model_->label[i] > model_->label[j]) swap(model_->label[i],model_->label[j]); problem sub_prob; sub_prob.l = l; sub_prob.n = n; sub_prob.x = Malloc(feature_node *,sub_prob.l); sub_prob.y = Malloc(double,sub_prob.l); for(int k=0; k<sub_prob.l; k++) {sub_prob.x[k] = prob->x[perm[k]]; sub_prob.y[k] = (prob->y[perm[k]]-1)/(nr_class -1) - 0.5;} train_one(&sub_prob, param, model_->w, 0, 0); } else { int nr_class; int *label = NULL; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); // group training data of the same class group_classes(prob,&nr_class,&label,&start,&count,perm); model_->nr_class=nr_class; model_->label = Malloc(int,nr_class); for(i=0;i<nr_class;i++) model_->label[i] = label[i]; // calculate weighted C double *weighted_C = Malloc(double, nr_class); for(i=0;i<nr_class;i++) weighted_C[i] = param->C; for(i=0;i<param->nr_weight;i++) { for(j=0;j<nr_class;j++) if(param->weight_label[i] == label[j]) break; if(j == nr_class) fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]); else weighted_C[j] *= param->weight[i]; } // constructing the subproblem feature_node **x = Malloc(feature_node *,l); for(i=0;i<l;i++) x[i] = prob->x[perm[i]]; int k; problem sub_prob; sub_prob.l = l; sub_prob.n = n; sub_prob.x = Malloc(feature_node *,sub_prob.l); sub_prob.y = Malloc(double,sub_prob.l); for(k=0; k<sub_prob.l; k++) sub_prob.x[k] = x[k]; // multi-class svm by Crammer and Singer if(param->solver_type == MCSVM_CS) { model_->w=Malloc(double, n*nr_class); for(i=0;i<nr_class;i++) for(j=start[i];j<start[i]+count[i];j++) sub_prob.y[j] = i; Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps); Solver.Solve(model_->w); } else if(nr_class == 2 && model_->param.solver_type!= L2R_SVOR && model_->param.solver_type != L2R_NPSVOR && model_->param.solver_type!= L2R_SVMOP) /*My modification*/ { model_->w=Malloc(double, w_size); int e0 = start[0]+count[0]; k=0; for(; k<e0; k++) sub_prob.y[k] = +1; for(; k<sub_prob.l; k++) sub_prob.y[k] = -1; if(param->init_sol != NULL) for(i=0;i<w_size;i++) model_->w[i] = param->init_sol[i]; else for(i=0;i<w_size;i++) model_->w[i] = 0; train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]); free(sub_prob.y); } else if(model_->param.solver_type== L2R_SVOR) //My modification begin { // if(nr_class == 2) // for(i=0;i<prob->l;i++) // if(subprob->y[i]== model_->label[1]) // subprob->y[i]==1; // else // subprob->y[i]= 2; info("%d %d\n",model_->label[0],model_->label[1]); model_->w=Malloc(double, w_size); model_->b=Malloc(double, nr_class-1); //add for(i=0; i< l; i++) sub_prob.y[i] = prob->y[perm[i]]; for(i=0;i<nr_class-1;i++) //add model_->b[i] = 0; //add if(param->init_sol != NULL) {for(i=0;i<w_size;i++) model_->w[i] = param->init_sol[i]; } else { for(i=0;i<w_size;i++) model_->w[i] = 0; } info("%g\n",param->svor); for(i=0;i<nr_class-1;i++) for(j=i+1;j< nr_class;j++) if(model_->label[i] > model_->label[j]) swap(model_->label[i],model_->label[j]); if(param->svor==1){ solve_l2r_svor(&sub_prob, param, model_->w, model_->b, model_->label, nr_class); } else if(param->svor==2) solve_l2r_svor_admm(&sub_prob, param, model_->w, model_->b, nr_class); else if(param->svor==3) {solve_l2r_svor_full(&sub_prob, param, model_->w, model_->b, model_->label, nr_class);} for(i=0;i<nr_class-1;i++) info("b %.6f\n",model_->b[i]); } // My modification end else if (model_->param.solver_type== L2R_NPSVOR) { for(i=0; i< l; i++) sub_prob.y[i] = prob->y[perm[i]]; for(i=0;i<nr_class-1;i++) for(j=i+1;j< nr_class;j++) if(model_->label[i] > model_->label[j]) swap(model_->label[i],model_->label[j]); model_->w=Malloc(double, w_size*nr_class); double *w=Malloc(double, w_size); for(i=0;i<nr_class;i++) { if(param->init_sol != NULL) for(j=0;j<w_size;j++) w[j] = param->init_sol[j*nr_class+i]; else for(j=0;j<w_size;j++) w[j] = 0; if(param->npsvor==1){ solve_l2r_npsvor(&sub_prob, w, param, model_->label[i]); //ramp_npsvor(&sub_prob, w, param, model_->label[i]); } else if(param->npsvor==2) { int nk = 0; for(j=0;j<sub_prob.l;j++) if(sub_prob.y[j]== model_->label[i]) nk++; solve_l2r_npsvor_full(&sub_prob, w, param, model_->label[i],nk); } else if(param->npsvor==3) { //solve_l2r_npsvor(&sub_prob, w, param, model_->label[i]); ramp_npsvor(&sub_prob, w, param, model_->label[i]); } else if(param->npsvor==4) { //solve_l2r_npsvor(&sub_prob, w, param, model_->label[i]); double *w1 = new double[w_size]; double *w2 = new double[w_size]; double *QD = new double[l]; memset(w1,0,sizeof(double)*w_size); memset(w2,0,sizeof(double)*w_size); memset(QD,0,sizeof(double)*l); if(i<nr_class-1) { for(j=0; j<l; j++) { feature_node * const xi = sub_prob.x[j]; QD[j] = sparse_operator::nrm2_sq(xi); } for(j=0; j< l; j++) if(prob->y[perm[j]]>model_->label[i]) sub_prob.y[j] = 1; else sub_prob.y[j] = -1; solve_l2r_npsvor_two(&sub_prob, w1, param,QD); for(j=0; j< l; j++) if(prob->y[perm[j]] > model_->label[i]) sub_prob.y[j] = -1; else sub_prob.y[j] = 1; solve_l2r_npsvor_two(&sub_prob, w2, param,QD); } for(j=0;j<w_size;j++) w[j] = w1[j]-w2[j]; } for(int j=0;j<w_size;j++) model_->w[j*nr_class+i] = w[j]; } } else if (model_->param.solver_type== L2R_SVMOP) { // for(i=0; i< l; i++) // sub_prob.y[i] = prob->y[perm[i]]; for(i=0;i<nr_class-1;i++) for(j=i+1;j< nr_class;j++) if(model_->label[i] > model_->label[j]) swap(model_->label[i],model_->label[j]); model_->w=Malloc(double, w_size*(nr_class-1)); double *w=Malloc(double, w_size); for(i=0;i<nr_class-1;i++) { for(int s=0;s<prob->l;s++) if(prob->y[perm[s]]> model_->label[i]) sub_prob.y[s]=1; else sub_prob.y[s]= -1; if(param->init_sol != NULL) for(j=0;j<w_size;j++) w[j] = param->init_sol[j*(nr_class-1)+i]; else for(j=0;j<w_size;j++) w[j] = 0; train_one(&sub_prob, param, w, weighted_C[i], param->C); for(int j=0;j<w_size;j++) { model_->w[j*(nr_class-1)+i] = w[j]; } } free(w); free(sub_prob.y); } else { model_->w=Malloc(double, w_size*nr_class); double *w=Malloc(double, w_size); for(i=0;i<nr_class;i++) { int si = start[i]; int ei = si+count[i]; k=0; for(; k<si; k++) sub_prob.y[k] = -1; for(; k<ei; k++) sub_prob.y[k] = +1; for(; k<sub_prob.l; k++) sub_prob.y[k] = -1; if(param->init_sol != NULL) for(j=0;j<w_size;j++) w[j] = param->init_sol[j*nr_class+i]; else for(j=0;j<w_size;j++) w[j] = 0; train_one(&sub_prob, param, w, weighted_C[i], param->C); for(int j=0;j<w_size;j++) model_->w[j*nr_class+i] = w[j]; } free(w); free(sub_prob.y); } // info("sssssss%.6f\n",model_->w[1]); free(x); free(label); free(start); free(count); free(perm); free(sub_prob.x); // free(sub_prob.y); free(weighted_C); } // info("sssssss%.6f\n",model_->w[1]); // info("b %.6f\n",model_->b[0]); return model_;}void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target){ // printf("%g\n",param->C); int i; int *fold_start; int l = prob->l; int *perm = Malloc(int,l); if (nr_fold > l) { nr_fold = l; fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); } fold_start = Malloc(int,nr_fold+1); for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; struct problem subprob; subprob.bias = prob->bias; subprob.n = prob->n; subprob.l = l-(end-begin); subprob.x = Malloc(struct feature_node*,subprob.l); subprob.y = Malloc(double,subprob.l); k=0; for(j=0;j<begin;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct model *submodel = train(&subprob,param); for(j=begin;j<end;j++) target[perm[j]] = predict(submodel,prob->x[perm[j]]); free_and_destroy_model(&submodel); free(subprob.x); free(subprob.y); } free(fold_start); free(perm);}void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_acc_rate, double *best_mae_rate){ // variables for CV int i; int *fold_start; int l = prob->l; int *perm = Malloc(int, l); double *target = Malloc(double, prob->l); struct problem *subprob = Malloc(problem,nr_fold); // variables for warm start double ratio = 2; double **prev_w = Malloc(double*, nr_fold); for(i = 0; i < nr_fold; i++) prev_w[i] = NULL; int num_unchanged_w = 0; struct parameter param1 = *param; void (*default_print_string) (const char *) = liblinear_print_string; if (nr_fold > l) { nr_fold = l; fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); } fold_start = Malloc(int,nr_fold+1); for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; subprob[i].bias = prob->bias; subprob[i].n = prob->n; subprob[i].l = l-(end-begin); subprob[i].x = Malloc(struct feature_node*,subprob[i].l); subprob[i].y = Malloc(double,subprob[i].l); k=0; for(j=0;j<begin;j++) { subprob[i].x[k] = prob->x[perm[j]]; subprob[i].y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob[i].x[k] = prob->x[perm[j]]; subprob[i].y[k] = prob->y[perm[j]]; ++k; } } *best_acc_rate = 0; *best_mae_rate = INF; if(start_C <= 0) start_C = calc_start_C(prob,param); param1.C = start_C; while(param1.C <= max_C) { //Output disabled for running CV at a particular C set_print_string_function(&print_null); if(param->solver_type == L2R_NPSVOR) { param1.C1 = param1.C; param1.C2 = param1.C; } for(i=0; i<nr_fold; i++) { int j; int begin = fold_start[i]; int end = fold_start[i+1]; param1.init_sol = prev_w[i]; struct model *submodel = train(&subprob[i],¶m1); int total_w_size; if((submodel->nr_class == 2 && submodel->param.solver_type != L2R_NPSVOR)||submodel->param.solver_type == L2R_SVOR||check_regression_model(submodel)) total_w_size = subprob[i].n; else if(submodel->param.solver_type == L2R_SVMOP) total_w_size = subprob[i].n * (submodel->nr_class-1); else total_w_size = subprob[i].n * submodel->nr_class; if(prev_w[i] == NULL) { prev_w[i] = Malloc(double, total_w_size); for(j=0; j<total_w_size; j++) prev_w[i][j] = submodel->w[j]; } else if(num_unchanged_w >= 0) { double norm_w_diff = 0; for(j=0; j<total_w_size; j++) { norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]); prev_w[i][j] = submodel->w[j]; } norm_w_diff = sqrt(norm_w_diff); if(norm_w_diff > 1e-15) num_unchanged_w = -1; } else { for(j=0; j<total_w_size; j++) prev_w[i][j] = submodel->w[j]; } for(j=begin; j<end; j++) target[perm[j]] = predict(submodel,prob->x[perm[j]]); free_and_destroy_model(&submodel); } set_print_string_function(default_print_string); int total_correct = 0; double total_abserror=0; for(i=0; i<prob->l; i++) {if(target[i] == prob->y[i]) ++total_correct; total_abserror += fabs(target[i] - prob->y[i]); } double current_mae_rate = (double)total_abserror/prob->l; double current_acc_rate = (double)total_correct/prob->l; if(current_mae_rate < *best_mae_rate) { *best_C = param1.C; *best_mae_rate = current_mae_rate; *best_acc_rate = current_acc_rate; } info("log2c=%7.2f\tacc_rate=%.6f\tmae_rate=%.6f\n",log(param1.C)/log(2.0),current_acc_rate,current_mae_rate); num_unchanged_w++; if(num_unchanged_w == 3) break; param1.C = param1.C*ratio; } if(param1.C > max_C && max_C > start_C) info("warning: maximum C reached.\n"); free(fold_start); free(perm); free(target); for(i=0; i<nr_fold; i++) { free(subprob[i].x); free(subprob[i].y); free(prev_w[i]); } free(prev_w); free(subprob);}void find_parameter_npsvor(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C1, double *best_C2, double *best_acc_rate, double *best_mae_rate){ // variables for CV int i; int *fold_start; int l = prob->l; int *perm = Malloc(int, l); double *target = Malloc(double, prob->l); struct problem *subprob = Malloc(problem,nr_fold); // variables for warm start double ratio = 2; double **prev_w = Malloc(double*, nr_fold); for(i = 0; i < nr_fold; i++) prev_w[i] = NULL; int num_unchanged_w = 0; struct parameter param1 = *param; void (*default_print_string) (const char *) = liblinear_print_string; if (nr_fold > l) { nr_fold = l; fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); } fold_start = Malloc(int,nr_fold+1); for(i=0;i<l;i++) perm[i]=i; for(i=0;i<l;i++) { int j = i+rand()%(l-i); swap(perm[i],perm[j]); } for(i=0;i<=nr_fold;i++) fold_start[i]=i*l/nr_fold; for(i=0;i<nr_fold;i++) { int begin = fold_start[i]; int end = fold_start[i+1]; int j,k; subprob[i].bias = prob->bias; subprob[i].n = prob->n; subprob[i].l = l-(end-begin); subprob[i].x = Malloc(struct feature_node*,subprob[i].l); subprob[i].y = Malloc(double,subprob[i].l); k=0; for(j=0;j<begin;j++) { subprob[i].x[k] = prob->x[perm[j]]; subprob[i].y[k] = prob->y[perm[j]]; ++k; } for(j=end;j<l;j++) { subprob[i].x[k] = prob->x[perm[j]]; subprob[i].y[k] = prob->y[perm[j]]; ++k; } } *best_acc_rate = 0; *best_mae_rate = 0; if(start_C <= 0) start_C = calc_start_C(prob,param); param1.C1 = start_C; while(param1.C1 <= max_C) { param1.C2 = start_C; while(param1.C2 <= max_C) { //Output disabled for running CV at a particular C set_print_string_function(&print_null); for(i=0; i<nr_fold; i++) { int j; int begin = fold_start[i]; int end = fold_start[i+1]; param1.init_sol = prev_w[i]; struct model *submodel = train(&subprob[i],¶m1); int total_w_size; if(submodel->nr_class == 2) total_w_size = subprob[i].n; else total_w_size = subprob[i].n * submodel->nr_class; if(prev_w[i] == NULL) { prev_w[i] = Malloc(double, total_w_size); for(j=0; j<total_w_size; j++) prev_w[i][j] = submodel->w[j]; } else if(num_unchanged_w >= 0) { double norm_w_diff = 0; for(j=0; j<total_w_size; j++) { norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]); prev_w[i][j] = submodel->w[j]; } norm_w_diff = sqrt(norm_w_diff); if(norm_w_diff > 1e-15) num_unchanged_w = -1; } else { for(j=0; j<total_w_size; j++) prev_w[i][j] = submodel->w[j]; } for(j=begin; j<end; j++) target[perm[j]] = predict(submodel,prob->x[perm[j]]); free_and_destroy_model(&submodel); } set_print_string_function(default_print_string); int total_correct = 0; double total_abserror=0; for(i=0; i<prob->l; i++) {if(target[i] == prob->y[i]) ++total_correct; total_abserror += fabs(target[i] - prob->y[i]); } double current_mae_rate = (double)total_abserror/prob->l; double current_acc_rate = (double)total_correct/prob->l; if(current_mae_rate > *best_mae_rate) { *best_C1 = param1.C1; *best_C2 = param1.C2; *best_mae_rate = current_mae_rate; *best_acc_rate = current_acc_rate; } info("log2c1=%7.2f log2c2=%7.2f\tacc_rate=%g\tmae_rate=%g\n",log(param1.C1)/log(2.0), log(param1.C2)/log(2.0),current_acc_rate,current_mae_rate); num_unchanged_w++; if(num_unchanged_w == 3) break; param1.C2 = param1.C2*ratio; } param1.C1 = param1.C1*ratio; } if((param1.C1 > max_C||param1.C2 > max_C) && max_C > start_C) info("warning: maximum C reached.\n"); free(fold_start); free(perm); free(target); for(i=0; i<nr_fold; i++) { free(subprob[i].x); free(subprob[i].y); free(prev_w[i]); } free(prev_w); free(subprob);}double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values){ int idx; int n; if(model_->bias>=0) n=model_->nr_feature+1; else n=model_->nr_feature; double *w=model_->w; int nr_class=model_->nr_class; int i; int nr_w; if((nr_class==2 && model_->param.solver_type != MCSVM_CS && model_->param.solver_type != L2R_NPSVOR)||model_->param.solver_type == L2R_SVOR||check_regression_model(model_)) nr_w = 1; else if(model_->param.solver_type == L2R_SVMOP) nr_w = nr_class-1; else nr_w = nr_class; const feature_node *lx=x; for(i=0;i<nr_w;i++) dec_values[i] = 0; for(; (idx=lx->index)!=-1; lx++) { // the dimension of testing data may exceed that of training if(idx<=n) for(i=0;i<nr_w;i++) dec_values[i] += w[(idx-1)*nr_w+i]*lx->value; } if(check_regression_model(model_)) for(i=0;i<nr_w;i++) dec_values[i] = (dec_values[i]+0.5)*(nr_class-1)+1; if(nr_class==2 && model_->param.solver_type!= L2R_SVOR && model_->param.solver_type != L2R_NPSVOR && model_->param.solver_type != L2R_SVMOP) { if(check_regression_model(model_)) return (fabs(dec_values[0]-model_->label[0])<fabs(dec_values[0]-model_->label[1]))?model_->label[0]:model_->label[1]; else return (dec_values[0]>0)?model_->label[0]:model_->label[1]; } else if (model_->param.solver_type == L2R_SVOR)// My modification begin { int count = 1; for(int j=0;j<nr_class-1;j++) { if (dec_values[0] + model_->b[j]>0) count = count +1; } return model_->label[count-1]; } // My modification end else if (model_->param.solver_type == L2R_NPSVOR && model_->param.npsvor!=4) { //******************************************predict method 1 // int dec_min_idx = 0; // for(i=1;i<nr_class;i++) // { // if(fabs(dec_values[i]) < fabs(dec_values[dec_min_idx])) // dec_min_idx = i; // } // return model_->label[dec_min_idx]; //******************************************predict method 2 int count = 1; for(int j=0;j<nr_class-1;j++) { // if (dec_values[j+1]+dec_values[j]>0) // {count += 1;} if (dec_values[j+1]+dec_values[j]>0) count += 1; } return model_->label[count-1]; // int count = 1; // for(int j=0;j<nr_class-1;j++) // { // // if (dec_values[j+1]+dec_values[j]>0) // // {count += 1;} // if (dec_values[j+1]+dec_values[j]>0) // count = max(count,j+2); // } // return model_->label[count-1]; //******************************************predict method 3 // double *dec = Malloc(double, nr_class*(nr_class+1)/2); // int *vote = Malloc(int,nr_class); // for(i=0;i<nr_class;i++) // vote[i] = 0; // int p=0; // for(i=0;i<nr_class;i++) // for(int j=i+1;j<nr_class;j++) // { // dec[p] = dec_values[j] + dec_values[i]; // if(dec[p] > 0) // ++vote[j]; // else // ++vote[i]; // p++; // } // int vote_max_idx = 0; // for(i=1;i<nr_class;i++) // if(vote[i] > vote[vote_max_idx]) // vote_max_idx = i; // free(vote); // return model_->label[vote_max_idx]; } else if(check_regression_model(model_)) { int dec_min_idx = 0; for(i=1;i<nr_class;i++) { if(fabs(dec_values[0]-model_->label[i]) < fabs(dec_values[0]-model_->label[dec_min_idx])) dec_min_idx = i; } return model_->label[dec_min_idx]; } else if (model_->param.solver_type == L2R_SVMOP || (model_->param.solver_type == L2R_NPSVOR && model_->param.npsvor==4)) { // int dec_max_idx = 0; // double *prob_estimates = Malloc(double, model_->nr_class); // for(i=0;i<nr_class-1;i++) // dec_values[i]=1/(1+exp(-dec_values[i])); // // double sum=0; // // for(i=0; i<nr_class-1; i++) // // sum+=dec_values[i]; // // for(i=0; i<nr_class-1; i++) // // dec_values[i]=dec_values[i]/sum; // for(i=0;i<nr_class;i++) // { // if(i==0) // prob_estimates[i] = 1-dec_values[0]; // else if(i==nr_class-1) // prob_estimates[i] = dec_values[nr_class-1]; // else // prob_estimates[i] = dec_values[i-1]-dec_values[i]; // } // // for(i=0;i<nr_class-1;i++) // // {info("%.3f ",dec_values[i]);} // for(i=1;i<nr_class;i++) // { // if(prob_estimates[i] > prob_estimates[dec_max_idx]) // dec_max_idx = i; // } // return model_->label[dec_max_idx]; //******************************************predict method 3 // int dec_max_idx = 0; // for(i=0;i<nr_w;i++) // dec_values[i]=1/(1+exp(-dec_values[i])); // double *prob_estimates = Malloc(double, model_->nr_class); // if(nr_class==2) // for binary classification // { // prob_estimates[0]=1.- dec_values[0]; // prob_estimates[1] = dec_values[0]; // } // else // { // double sum=0; // for(i=0; i<nr_w; i++) // sum += dec_values[i]; // for(i=0; i<nr_w; i++) // dec_values[i]=dec_values[i]/sum; // // info("%.3f ", dec_values[0]); // for(i=0;i<nr_class;i++) // { // if(i==0) // prob_estimates[i] = 1-dec_values[0]; // else if(i==nr_class-1) // prob_estimates[i] = dec_values[nr_w-1]; // else // prob_estimates[i] = dec_values[i-1] - dec_values[i]; // } // } // // for(i=0;i<nr_class;i++) // // {info("%.3f ", prob_estimates[i]);} // for(i=1;i<nr_class;i++) // { // if(prob_estimates[i] > prob_estimates[dec_max_idx]) // dec_max_idx = i; // } // return model_->label[dec_max_idx]; //******************************************predict method 3 int count = 1; for(int j=0;j<nr_class-1;j++) { if (dec_values[j]>0) count += 1; } return model_->label[count-1]; } else { int dec_max_idx = 0; for(i=1;i<nr_class;i++) { if(dec_values[i] > dec_values[dec_max_idx]) dec_max_idx = i; } return model_->label[dec_max_idx]; }}double predict(const model *model_, const feature_node *x){ double *dec_values = Malloc(double, model_->nr_class); double label=predict_values(model_, x, dec_values); free(dec_values); return label;}double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates){ if(check_probability_model(model_)) { int i; int nr_class=model_->nr_class; int nr_w; if(nr_class==2) nr_w = 1; else nr_w = nr_class; double label=predict_values(model_, x, prob_estimates); for(i=0;i<nr_w;i++) prob_estimates[i]=1/(1+exp(-prob_estimates[i])); if(nr_class==2) // for binary classification prob_estimates[1]=1.-prob_estimates[0]; else { double sum=0; for(i=0; i<nr_class; i++) sum+=prob_estimates[i]; for(i=0; i<nr_class; i++) prob_estimates[i]=prob_estimates[i]/sum; } return label; } else return 0;}static const char *solver_type_table[]={ "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS", "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", "L2R_SVOR", "L2R_NPSVOR", "L2R_SVMOP",//My modification "L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL", NULL};int save_model(const char *model_file_name, const struct model *model_){ int i; int nr_feature=model_->nr_feature; int n; const parameter& param = model_->param; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int w_size = n; FILE *fp = fopen(model_file_name,"w"); if(fp==NULL) return -1; char *old_locale = setlocale(LC_ALL, NULL); if (old_locale) { old_locale = strdup(old_locale); } setlocale(LC_ALL, "C"); int nr_w; if((model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) ||model_->param.solver_type ==L2R_SVOR||check_regression_model(model_)) nr_w=1; else if(model_->param.solver_type ==L2R_SVMOP) nr_w = model_->nr_class -1; else nr_w=model_->nr_class; fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]); fprintf(fp, "nr_class %d\n", model_->nr_class); if(model_->label) { fprintf(fp, "label"); for(i=0; i<model_->nr_class; i++) fprintf(fp, " %d", model_->label[i]); fprintf(fp, "\n"); } fprintf(fp, "nr_feature %d\n", nr_feature); fprintf(fp, "bias %.16g\n", model_->bias); fprintf(fp, "w\n"); for(i=0; i<w_size; i++) { int j; for(j=0; j<nr_w; j++) fprintf(fp, "%.16g ", model_->w[i*nr_w+j]); fprintf(fp, "\n"); } if(model_->param.solver_type==L2R_SVOR) { fprintf(fp, "b\n"); for(i=0; i<model_->nr_class-1; i++) { fprintf(fp, "%.16g ", model_->b[i]); fprintf(fp, "\n"); } } setlocale(LC_ALL, old_locale); free(old_locale); if (ferror(fp) != 0 || fclose(fp) != 0) return -1; else return 0;}//// FSCANF helps to handle fscanf failures.// Its do-while block avoids the ambiguity when// if (...)// FSCANF();// is used//#define FSCANF(_stream, _format, _var)do{ if (fscanf(_stream, _format, _var) != 1) { fprintf(stderr, "ERROR: fscanf failed to read the model\n"); EXIT_LOAD_MODEL() }}while(0)// EXIT_LOAD_MODEL should NOT end with a semicolon.#define EXIT_LOAD_MODEL(){ setlocale(LC_ALL, old_locale); free(model_->label); free(model_); free(old_locale); return NULL;}struct model *load_model(const char *model_file_name){ FILE *fp = fopen(model_file_name,"r"); if(fp==NULL) return NULL; int i; int nr_feature; int n; int nr_class; double bias; model *model_ = Malloc(model,1); parameter& param = model_->param; model_->label = NULL; char *old_locale = setlocale(LC_ALL, NULL); if (old_locale) { old_locale = strdup(old_locale); } setlocale(LC_ALL, "C"); char cmd[81]; while(1) { FSCANF(fp,"%80s",cmd); if(strcmp(cmd,"solver_type")==0) { FSCANF(fp,"%80s",cmd); int i; for(i=0;solver_type_table[i];i++) { if(strcmp(solver_type_table[i],cmd)==0) { param.solver_type=i; break; } } if(solver_type_table[i] == NULL) { fprintf(stderr,"unknown solver type.\n"); EXIT_LOAD_MODEL() } } else if(strcmp(cmd,"nr_class")==0) { FSCANF(fp,"%d",&nr_class); model_->nr_class=nr_class; } else if(strcmp(cmd,"nr_feature")==0) { FSCANF(fp,"%d",&nr_feature); model_->nr_feature=nr_feature; } else if(strcmp(cmd,"bias")==0) { FSCANF(fp,"%lf",&bias); model_->bias=bias; } else if(strcmp(cmd,"w")==0) { break; } else if(strcmp(cmd,"label")==0) { int nr_class = model_->nr_class; model_->label = Malloc(int,nr_class); for(int i=0;i<nr_class;i++) FSCANF(fp,"%d",&model_->label[i]); } else { fprintf(stderr,"unknown text in model file: [%s]\n",cmd); EXIT_LOAD_MODEL() } } nr_feature=model_->nr_feature; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int w_size = n; int nr_w; // if(nr_class==2 && param.solver_type != MCSVM_CS) if((nr_class==2 && param.solver_type != MCSVM_CS)||param.solver_type ==L2R_SVOR||check_regression_model(model_)) nr_w = 1; else if(param.solver_type ==L2R_SVMOP) nr_w = nr_class -1; else nr_w = nr_class; model_->w=Malloc(double, w_size*nr_w); for(i=0; i<w_size; i++) { int j; for(j=0; j<nr_w; j++) { FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]); // info("%.3f ",model_->w[i*nr_w+j]); } if (fscanf(fp, "\n") !=0) { fprintf(stderr, "ERROR: fscanf failed to read the model\n"); EXIT_LOAD_MODEL() } } if(param.solver_type == L2R_SVOR) { FSCANF(fp,"%80s",cmd); if(strcmp(cmd,"b")==0) { model_->b=Malloc(double, nr_class-1); for(i=0; i<nr_class-1; i++) { FSCANF(fp, "%lf ", &model_->b[i]); // info("%f\n",model_->b[i]); } } } setlocale(LC_ALL, old_locale); free(old_locale); if (ferror(fp) != 0 || fclose(fp) != 0) return NULL; // info("%f\n",model_->b[0]); return model_;}int get_nr_feature(const model *model_){ return model_->nr_feature;}int get_nr_class(const model *model_){ return model_->nr_class;}void get_labels(const model *model_, int* label){ if (model_->label != NULL) for(int i=0;i<model_->nr_class;i++) label[i] = model_->label[i];}// use inline here for better performance (around 20% faster than the non-inline one)static inline double get_w_value(const struct model *model_, int idx, int label_idx) { int nr_class = model_->nr_class; int solver_type = model_->param.solver_type; const double *w = model_->w; if(idx < 0 || idx > model_->nr_feature) return 0; if(check_regression_model(model_)) return w[idx]; else { if(label_idx < 0 || label_idx >= nr_class) return 0; if((nr_class==2 && solver_type != MCSVM_CS && solver_type != L2R_NPSVOR)||solver_type ==L2R_SVOR) //add // if(nr_class == 2 && solver_type != MCSVM_CS) { if(label_idx == 0) return w[idx]; else return -w[idx]; } else if(solver_type == L2R_SVMOP) return w[idx*(nr_class-1)+label_idx]; else return w[idx*nr_class+label_idx]; }}// feat_idx: starting from 1 to nr_feature// label_idx: starting from 0 to nr_class-1 for classification models;// for regression models, label_idx is ignored.double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx){ if(feat_idx > model_->nr_feature) return 0; return get_w_value(model_, feat_idx-1, label_idx);}double get_decfun_bias(const struct model *model_, int label_idx){ int bias_idx = model_->nr_feature; double bias = model_->bias; if(bias <= 0) return 0; else return bias*get_w_value(model_, bias_idx, label_idx);}void free_model_content(struct model *model_ptr){ if(model_ptr->w != NULL) free(model_ptr->w); if(model_ptr->label != NULL) free(model_ptr->label);}void free_and_destroy_model(struct model **model_ptr_ptr){ struct model *model_ptr = *model_ptr_ptr; if(model_ptr != NULL) { free_model_content(model_ptr); free(model_ptr); }}void destroy_param(parameter* param){ if(param->weight_label != NULL) free(param->weight_label); if(param->weight != NULL) free(param->weight); if(param->init_sol != NULL) free(param->init_sol);}const char *check_parameter(const problem *prob, const parameter *param){ if(param->eps <= 0) return "eps <= 0"; if(param->C <= 0) return "C <= 0"; if(param->p < 0) return "p < 0"; if(param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC_DUAL && param->solver_type != L2R_L2LOSS_SVC && param->solver_type != L2R_L1LOSS_SVC_DUAL && param->solver_type != MCSVM_CS && param->solver_type != L1R_L2LOSS_SVC && param->solver_type != L1R_LR && param->solver_type != L2R_LR_DUAL && param->solver_type != L2R_SVOR//my modification && param->solver_type != L2R_NPSVOR && param->solver_type != L2R_SVMOP && param->solver_type != L2R_L2LOSS_SVR && param->solver_type != L2R_L2LOSS_SVR_DUAL && param->solver_type != L2R_L1LOSS_SVR_DUAL) {return "unknown solver type";} if(param->init_sol != NULL && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC) return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC"; return NULL;}int check_probability_model(const struct model *model_){ return (model_->param.solver_type==L2R_LR || model_->param.solver_type==L2R_LR_DUAL || model_->param.solver_type==L1R_LR);}int check_regression_model(const struct model *model_){ return (model_->param.solver_type==L2R_L2LOSS_SVR || model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);}void set_print_string_function(void (*print_func)(const char*)){ if (print_func == NULL) liblinear_print_string = &print_string_stdout; else liblinear_print_string = print_func;}
linear.h
#ifndef _LIBLINEAR_H#define _LIBLINEAR_H#ifdef __cplusplusextern "C" {#endifstruct feature_node{ int index; double value;};struct problem{ int l, n; double *y; struct feature_node **x; double bias; /* < 0 if no bias term */ };enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL,L2R_SVOR,L2R_NPSVOR,L2R_SVMOP,//my modificationL2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */struct parameter{ int solver_type; /* these are for training only */ double eps; /* stopping criteria */ double C; int nr_weight; int *weight_label; double* weight; double p; double *init_sol;/** -------------------my modification---------------------------*/ double rho; double wl; //power for cost double svor; double C1; double C2; double npsvor; double g;/** -------------------my modification---------------------------*/};struct model{ struct parameter param; int nr_class; /* number of classes */ int nr_feature; double *w; int *label; /* label of each class */ double bias; double *b; /*My modification*************/};struct model* train(const struct problem *prob, const struct parameter *param);void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_acc_rate, double *best_mae_rate);void find_parameter_npsvor(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C1, double *best_C2, double *best_acc_rate, double *best_mae_rate);double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);double predict(const struct model *model_, const struct feature_node *x);double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);int save_model(const char *model_file_name, const struct model *model_);struct model *load_model(const char *model_file_name);int get_nr_feature(const struct model *model_);int get_nr_class(const struct model *model_);void get_labels(const struct model *model_, int* label);double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);double get_decfun_bias(const struct model *model_, int label_idx);void free_model_content(struct model *model_ptr);void free_and_destroy_model(struct model **model_ptr_ptr);void destroy_param(struct parameter *param);const char *check_parameter(const struct problem *prob, const struct parameter *param);int check_probability_model(const struct model *model);int check_regression_model(const struct model *model);void set_print_string_function(void (*print_func) (const char*));#ifdef __cplusplus}#endif#endif /* _LIBLINEAR_H */
My Liblinear code
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。