首页 > 代码库 > hadoop中compare函数

hadoop中compare函数

在看hadoop  的二次排序的时候,改写了下, 加了第三个参数,  本来以为是在

 

   public int compareTo(IntPair o) {      System.out.println("-----------compareTo");      if (first != o.first) {        return first < o.first ? -1 : 1;      } else if (second != o.second) {        return second < o.second ? -1 : 1;      }else if (third != o.third) {    	        return third < o.third ? -1 : 1;}              return 0;      }

 本来以为排序在这里面进行, 后来发现不是,把比较第3个字段的代码去掉, 发现还是有序的。

后来通过打印得知在compare函数中,稍微改写了下

      public int compare(byte[] b1, int s1, int l1,                         byte[] b2, int s2, int l2) {        // 二进制数组读取       int intvalue =http://www.mamicode.com/ readInt(b1, s1);           System.out.println("s1 = " +  b1.length);       
    // 验证b1中存储的数据
int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); return compareBytes(b1, s1, l1, b2, s2, l2); } }

 有3个整形值, s1为开始位置, l1为长度12, 这样我们就可以读出我们的值

return compareBytes(b1, s1, l1, b2, s2, l2);调用 return FastByteComparisons.compareTo(b1, s1, l1, b2, s2, l2);

    public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2)      {        if ((buffer1 == buffer2) && (offset1 == offset2) && (length1 == length2))        {          return 0;        }        int end1 = offset1 + length1;        int end2 = offset2 + length2;        int i = offset1; for (int j = offset2; (i < end1) && (j < end2); ++j) {          int a = buffer1[i] & 0xFF;          int b = buffer2[j] & 0xFF;          if (a != b)            return (a - b);          ++i;        }        return (length1 - length2);      }    }  }

 从代码中就知道了,通过字节数组比较三个值, 这样就出来的结果就是有序的了

结论, 理论上N个字段这样出来的结果的都是有序的,只是比较的长度有所变化

测试又加了一个字段, 输出结果都是有序的。

测试代码

  public static class IntPair                       implements WritableComparable<IntPair> {    private int first = 0;    private int second = 0;    private int third = 0;    private int fourth = 0;        /**     * Set the left and right values.     */    public void set(int left, int right, int third, int fourth) {      first = left;      second = right;      this.third = third;      this.fourth = fourth;    }        public int getFirst() {      return first;    }        public int getSecond() {      return second;    }        public int getThird() {        return third;      }        public int getFourth() {        return fourth;      }        @Override	public String toString() {    	System.out.println("third = " + third);    	return first + "\t" + second + "\t" + third + "\t" + fourth;	}	/**     * Read the two integers.      * Encoded as: MIN_VALUE -> 0, 0 -> -MIN_VALUE, MAX_VALUE-> -1     */    @Override    public void readFields(DataInput in) throws IOException {      first = in.readInt();// + Integer.MIN_VALUE;      second = in.readInt();// + Integer.MIN_VALUE;      third = in.readInt();// + Integer.MIN_VALUE;      fourth = in.readInt();    }    @Override    public void write(DataOutput out) throws IOException {     /*      out.writeInt(first - Integer.MIN_VALUE);      out.writeInt(second - Integer.MIN_VALUE);      out.writeInt(third - Integer.MIN_VALUE);      */        out.writeInt(first );        out.writeInt(second );        out.writeInt(third );        out.writeInt(fourth);    }    @Override    public int hashCode() {      return first * 157 + second*10 + third;    }        @Override    public boolean equals(Object right) {      if (right instanceof IntPair) {        IntPair r = (IntPair) right;        return r.first == first && r.second == second && r.third == third && r.fourth == fourth;      } else {        return false;      }    }        /** A Comparator that compares serialized IntPair. */     public static class Comparator extends WritableComparator {      public Comparator() {        super(IntPair.class);      }            // 排序比较器,数据全部存在byte数组      public int compare(byte[] b1, int s1, int l1,                         byte[] b2, int s2, int l2) {        // 二进制数组读取       int intvalue = http://www.mamicode.com/readInt(b1, s1);	"s1 = " +  b1.length);              int third = 0;       for(int i =s1 + 9; i<= s1+ 12; i++){    	   third += (b1[i]&0xff) << (24-8*i);    	}       	System.out.println("third = " + third);    	          return compareBytes(b1, s1, l1, b2, s2, l2);      }    }    static {   // register this comparator      WritableComparator.define(IntPair.class, new Comparator());    }    // 好像没用上        @Override    public int compareTo(IntPair o) {      System.out.println("-----------compareTo");      if (first != o.first) {        return first < o.first ? -1 : 1;      } else if (second != o.second) {        return second < o.second ? -1 : 1;      }// else if (third != o.third) {    	//        return third < o.third ? -1 : 1;}              return 0;      }  }