首页 > 代码库 > java实现gbdt
java实现gbdt
DATA类
import java.io.File;import java.io.FileNotFoundException;import java.util.ArrayList;import java.util.Scanner;public class Data { private ArrayList<ArrayList<String>> trainData=http://www.mamicode.com/new ArrayList>();"D://javajavajava//dbdt//src//script//data//adult.data.csv"; Scanner in; try { in = new Scanner(new File(dataPath)); while (in.hasNext()) { String line=in.nextLine(); String []strs=line.trim().split(","); ArrayList<String> tmp=new ArrayList<>(); for(int i=0;i<strs.length;i++) { tmp.add(strs[i]); } this.trainData.add(tmp); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void main(String[] args) { // TODO Auto-generated method stub Data d =new Data(); }}
TREE类
import java.util.ArrayList;import java.util.HashSet;import java.util.Iterator;import java.util.Random;import java.util.spi.TimeZoneNameProvider;public class Tree { private Tree leftTree=new Tree(); private Tree rightTree=new Tree(); private double loss=-1; private int attributeSplit=0; private String attributeSplitType=""; boolean isLeaf; double leafValue; private ArrayList<Integer> leafNodeSet=new ArrayList<>(); public ArrayList<String> getAttributeSet(ArrayList<ArrayList<String>> trainData,int idx) { HashSet<String> mySet=new HashSet<>(); ArrayList<String> ans =new ArrayList<>(); for(int i=0;i<trainData.size();i++) { mySet.add(trainData.get(i).get(idx)); } Iterator<String> it=mySet.iterator(); while(it.hasNext()) { ans.add(it.next()); } return ans; } public boolean myCmpLess(String str1,String str2) { if(Integer.parseInt(str1.trim())<=Integer.parseInt(str2.trim())) return true; else return false; } public double computeLoss(ArrayList<Double> values) { double loss=0; for(int i=0;i<values.size();i++) { loss+=values.get(i); } double mean=loss/values.size(); loss=0; for(int i=0;i<values.size();i++) { loss+=Math.pow(values.get(i)-mean,2); } return Math.sqrt(loss); } public double getPredictValue(int K, ArrayList<Integer> subIdx,ArrayList<Double> target) { double ans=0; double sum=0,sum1=0; for(int i=0;i<subIdx.size();i++) { sum+=target.get(subIdx.get(i)); } for(int i=0;i<subIdx.size();i++) { sum1+=target.get(subIdx.get(i))*(1-target.get(subIdx.get(i))); } ans=(K-1)/K*sum/sum1; return ans; } public double getPredictValue(Tree root) { return root.leafValue; } public double getPredictValue(Tree root,ArrayList<String> instance,Boolean isDigit[]) { if(root.isLeaf) return root.leafValue; else if(isDigit[root.attributeSplit]) { if(myCmpLess(instance.get(root.attributeSplit).trim(),root.attributeSplitType)) return getPredictValue(root.leftTree, instance, isDigit); return getPredictValue(root.rightTree, instance, isDigit); } else { if(instance.get(root.attributeSplit).trim().equals(root.attributeSplitType)) return getPredictValue(root.leftTree, instance, isDigit); return getPredictValue(root.rightTree, instance, isDigit); } } public Tree constructTree(ArrayList<ArrayList<Integer>> leafNodes,ArrayList<Double> leafValues,int K,int splitPoints, Boolean isDigit[],ArrayList<Integer> subIdx,ArrayList<ArrayList<String>> trainData,ArrayList<Double> target,int maxDepth[],int depth) { int n=trainData.size(); int dim=trainData.get(0).size(); ArrayList<Integer> leftTreeIdx=new ArrayList<>(); ArrayList<Integer> rightTreeIdx=new ArrayList<>(); if(depth<maxDepth[0]) { /* * 从所有的attribute中选取最佳的attribute,并且attribute中最佳的分割点,对数据进行分割 * */ double loss=-1; ArrayList<Integer> leftNodes=new ArrayList<>(); ArrayList<Integer> rightNodes=new ArrayList<>(); int attributeSplit=0; String attributeSplitType=""; for(int i=0;i<dim;i++)//遍历所有的attribute { //得到该attribute下所有的distinct的值 ArrayList<String> myAttributeSet=new ArrayList<>(); ArrayList<String> subDigitAttribute=new ArrayList<>(); myAttributeSet=getAttributeSet(trainData, i); if(isDigit[i])//如果是数字,就从数组中随机选取splitpoints个节点,代表这个属性可以在这splitpoints下进行分割 { while(subDigitAttribute.size()<splitPoints) { Random r=new Random(); int tmp=r.nextInt(myAttributeSet.size()); subDigitAttribute.add(myAttributeSet.get(tmp)); myAttributeSet.clear(); myAttributeSet=subDigitAttribute; } } for(int j=0;j<myAttributeSet.size();j++) { for(int k=0;k<subIdx.size();k++) { if((!isDigit[i]&&trainData.get(subIdx.get(k)).get(i).trim().equals(myAttributeSet.get(j)))||(isDigit[i]&&myCmpLess(trainData.get(subIdx.get(k)).get(i),myAttributeSet.get(j)))) { leftTreeIdx.add(subIdx.get(k)); } else { rightTreeIdx.add(subIdx.get(k)); } } ArrayList<Double> leftTarget=new ArrayList<>(); ArrayList<Double> rightTarget=new ArrayList<>(); for(int k=0;k<leftTreeIdx.size();k++) leftTarget.add(target.get(leftTreeIdx.get(k))); for(int k=0;k<rightTreeIdx.size();k++) rightTarget.add(target.get(rightTreeIdx.get(k))); double lossTmp=computeLoss(leftTarget)+computeLoss(rightTarget); if(loss<0||loss<lossTmp) { leftNodes.clear(); rightNodes.clear(); for(int k=0;k<leftTreeIdx.size();k++) leftNodes.add(leftTreeIdx.get(k)); for(int k=0;k<rightTreeIdx.size();k++) rightNodes.add(rightTreeIdx.get(k)); attributeSplit=i; attributeSplitType=myAttributeSet.get(j); } } } Tree tmpTree=new Tree(); tmpTree.attributeSplit=attributeSplit; tmpTree.attributeSplitType=attributeSplitType; tmpTree.loss=loss; tmpTree.isLeaf=false; tmpTree.leftTree=constructTree(leafNodes,leafValues,K,splitPoints, isDigit, leftNodes, trainData, target, maxDepth, depth+1); tmpTree.leftTree=constructTree(leafNodes,leafValues,K,splitPoints, isDigit, rightNodes, trainData, target, maxDepth, depth+1); return tmpTree; } else { Tree tmpTree=new Tree(); tmpTree.isLeaf=true; tmpTree.leafValue=http://www.mamicode.com/getPredictValue(K, subIdx, target);>
GBDT类
import java.rmi.server.SkeletonNotFoundException;import java.util.ArrayList;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Map.Entry;import java.util.Random;import java.util.Set;public class GBDT { private ArrayList<ArrayList<String>> datas=new ArrayList<ArrayList<String>>(); private ArrayList<String> labelSets=new ArrayList<>(); private ArrayList<ArrayList<Double>> F=new ArrayList<ArrayList<Double>>(); private ArrayList<ArrayList<Double>> residual=new ArrayList<ArrayList<Double>>(); private ArrayList<ArrayList<String>> trainData=http://www.mamicode.com/new ArrayList>();"?")) { flag=false; break; } } if(!flag) continue; if(datas.get(i).get(this.dim).trim().equals("?")) continue; trainData.add(tmp); if(datas.get(i).get(this.dim).trim().equals("<=50K")) labelTrainData.add(-1); else labelTrainData.add(1); } this.n=this.labelTrainData.size(); for(int i=0;i<this.datas.get(0).size()-1;i++) labelSets.add(this.datas.get(0).get(i)); //初始化F矩阵为全0,F矩阵是n*2,是2分类问题,如果要多分类,改下这里就可以了 for(int i=0;i<this.n;i++) { ArrayList<Double> arrTmp=new ArrayList<Double>(); for(int j=0;j<2;j++) { arrTmp.add(0.0); } this.F.add(arrTmp); this.residual.add(arrTmp); } } public static void main(String[] args) { GBDT dGbdt=new GBDT(); dGbdt.getData(); System.err.println(dGbdt.n); }}
java实现gbdt
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。