首页 > 代码库 > Hbase(四) 过滤器查询
Hbase(四) 过滤器查询
引言:过滤器的类型很多,但是可以分为两大类——比较过滤器,专用过滤器
过滤器的作用是在服务端判断数据是否满足条件,然后只将满足条件的数据返回给客户端;
一、hbase过滤器的分类
1、比较过滤器
行键过滤器 RowFilter
Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("row-22")));
scan.setFilter(filter1);
列族过滤器 FamilyFilter
Filter filter1 = new FamilyFilter(CompareFilter.CompareOp.LESS, new BinaryComparator(Bytes.toBytes("colfam3")));
scan.setFilter(filter1);
列过滤器 QualifierFilter
Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("col-2")));
scan.setFilter(filter1);
值过滤器 ValueFilter
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(".4") );
scan.setFilter(filter1);
2、专用过滤器
单列值过滤器 SingleColumnValueFilter ----会返回满足条件的整行
SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("colfam1"),
Bytes.toBytes("col-5"),
CompareFilter.CompareOp.NOT_EQUAL,
new SubstringComparator("val-5"));
filter.setFilterIfMissing(true); //如果不设置为 true,则那些不包含指定 column 的行也会返回
scan.setFilter(filter1);
单列值排除器 SingleColumnValueExcludeFilter -----返回排除了该列的结果 与上面的结果相反
前缀过滤器 PrefixFilter----针对行键
Filter filter = new PrefixFilter(Bytes.toBytes("row1"));
scan.setFilter(filter1);
列前缀过滤器 ColumnPrefixFilter
Filter filter = new ColumnPrefixFilter(Bytes.toBytes("qual2"));
scan.setFilter(filter1);
分页过滤器 PageFilter
代码实现:
package com.ghgj.hbase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.BinaryPrefixComparator; import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.ColumnPrefixFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.FamilyFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter; import org.apache.hadoop.hbase.filter.PageFilter; import org.apache.hadoop.hbase.filter.PrefixFilter; import org.apache.hadoop.hbase.filter.QualifierFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; public class HbasePageDemo { // 声明静态配置 static Configuration conf = null; private static final String ZK_CONNECT_STR = "hadoop01:2181,hadoop02:2181,hadoop03:2181,hadoop04:2181,hadoop05:2181"; static { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", ZK_CONNECT_STR); } public static void main(String[] args) throws Exception { String tableName = "testfilter"; String cfName = "f1"; final byte[] POSTFIX = new byte[] { 0x00 }; HTable table = new HTable(conf, tableName); Filter filter = new PageFilter(3); byte[] lastRow = null; int totalRows = 0; while (true) { Scan scan = new Scan(); scan.setFilter(filter); if(lastRow != null){ //注意这里添加了 POSTFIX 操作,用来重置扫描边界 byte[] startRow = Bytes.add(lastRow,POSTFIX); scan.setStartRow(startRow); } ResultScanner scanner = table.getScanner(scan); int localRows = 0; Result result; while((result = scanner.next()) != null){ System.out.println(localRows++ + ":" + result); totalRows ++; lastRow = result.getRow(); } scanner.close(); if(localRows == 0) break; } System.out.println("total rows:" + totalRows); } / ** * 多种过滤条件的使用方法 * @throws Exception */ @Test public void testScan() throws Exception{ HTable table = new HTable(conf, "person".getBytes()); Scan scan = new Scan(Bytes.toBytes("person_zhang_000001"), Bytes.toBytes("person_zhang_000002")); //前缀过滤器----针对行键 Filter filter = new PrefixFilter(Bytes.toBytes("person")); //行过滤器 ---针对行键 ByteArrayComparable rowComparator = new BinaryComparator(Bytes.toBytes("person_zhang_000001")); RowFilter rf = new RowFilter(CompareOp.LESS_OR_EQUAL, rowComparator); rf = new RowFilter(CompareOp.EQUAL , new SubstringComparator("_2016-12-31_")); //单值过滤器 1 完整匹配字节数组 new SingleColumnValueFilter("base_info".getBytes(), "name".getBytes(), CompareOp.EQUAL, "zhangsan".getBytes()); //单值过滤器 2 匹配正则表达式 ByteArrayComparable comparator = new RegexStringComparator("zhang."); new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator); //单值过滤器 3 匹配是否包含子串,大小写不敏感 comparator = new SubstringComparator("wu"); new SingleColumnValueFilter("info".getBytes(), "NAME".getBytes(), CompareOp.EQUAL, comparator); //键值对元数据过滤-----family 过滤----字节数组完整匹配 FamilyFilter ff = new FamilyFilter(CompareOp.EQUAL , new BinaryComparator(Bytes.toBytes("base_info")) //表中不存 在 inf 列族,过滤结果为空 ); //键值对元数据过滤-----family 过滤----字节数组前缀匹配 ff = new FamilyFilter( CompareOp.EQUAL , new BinaryPrefixComparator(Bytes.toBytes("inf")) //表中存在以 inf 打头的列族 info,过滤结果为该列族所有行 ); //键值对元数据过滤-----qualifier 过滤----字节数组完整匹配 filter = new QualifierFilter( CompareOp.EQUAL , new BinaryComparator(Bytes.toBytes("na")) //表中不存在 na 列,过滤结果为空 ); filter = new QualifierFilter( CompareOp.EQUAL , new BinaryPrefixComparator(Bytes.toBytes("na")) //表中存在以 na 打头的列 name,过滤结果为所有行的该列数据 ); //基于列名(即 Qualifier)前缀过滤数据的 ColumnPrefixFilter filter = new ColumnPrefixFilter("na".getBytes()); //基于列名(即 Qualifier)多个前缀过滤数据的 MultipleColumnPrefixFilter byte[][] prefixes = new byte[][] {Bytes.toBytes("na"), Bytes.toBytes("me")}; filter = new MultipleColumnPrefixFilter(prefixes); //为查询设置过滤条件 scan.setFilter(filter); scan.addFamily(Bytes.toBytes("base_info")); //一行 // Result result = table.get(get); //多行的数据 ResultScanner scanner = table.getScanner(scan); for(Result r : scanner){ /** for(KeyValue kv : r.list()){ String family = new String(kv.getFamily()); System.out.println(family); String qualifier = new String(kv.getQualifier()); System.out.println(qualifier); System.out.println(new String(kv.getValue())); } */ //直接从 result 中取到某个特定的 value byte[] value = http://www.mamicode.com/r.getValue(Bytes.toBytes("base_info"), Bytes.toBytes("name")); System.out.println(new String(value)); } table.close(); } }
Hbase(四) 过滤器查询