首页 > 代码库 > Gora快速入门

Gora快速入门

参考官方文档:http://gora.apache.org/current/tutorial.html


一、环境准备
1、下载gora并解压

2、分别进入$GORA_HOME/gora-hbase/,$GORA_HOME/gora-core,$GORA_HOME/gora-compiler,$GORA_HOME/gora-compiler-CLI执行
$ mvn clean install

3、启动hbase。
注意gora-0.5对应Hbase0.94

4、准备好日志文件,用于本项目的分析


二、建立项目

1、建立一个java project,并创建以下几个目录

技术分享
2、构建build path,增加hadoop library,hbase library以及avro, gora相关的类包。

3、将准备好的日志文件放到resource目录下

4、在conf目录下创建gora.properties,内容如下:

##gora.datastore.default is the default detastore implementation to use 
##if it is not passed to the DataStoreFactory#createDataStore() method.
gora.datastore.default=org.apache.gora.hbase.store.HBaseStore
#gora.datastore.default=org.apache.gora.cassandra.store.CassandraStore
#gora.datastore.default=org.apache.gora.solr.store.SolrStore

#gora.datastore.default=org.apache.gora.avro.store.AvroStore
#gora.avrostore.input.path=hdfs://localhost:9000/gora.avrostore.test.input
#gora.avrostore.output.path=hdfs://localhost:9000/gora.avrostore.test.output
#gora.avrostore.codec.type=JSON || BINARY

##whether to create schema automatically if not exists.
gora.datastore.autocreateschema=true

##Cassandra properties for gora-cassandra module using Cassandra
#gora.cassandrastore.servers=localhost:9160

##JDBC properties for gora-sql module using HSQL
gora.sqlstore.jdbc.driver=org.hsqldb.jdbcDriver
##HSQL jdbc connection as persistent in-process database
gora.sqlstore.jdbc.url=jdbc:hsqldb:file:./hsql-data

##HSQL jdbc connection as network server
#gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql://localhost/goratest

##JDBC properties for gora-sql module using MySQL
#gora.sqlstore.jdbc.driver=com.mysql.jdbc.Driver
#gora.sqlstore.jdbc.url=jdbc:mysql://localhost:3306/goratest
#gora.sqlstore.jdbc.user=root
#gora.sqlstore.jdbc.password=

gora.solrstore.solr.url=http://localhost:8983/solr
gora.solrstore.solr.commitwithin=0
gora.solrstore.solr.batchsize=100
# set which Solrj server impl you wish to use 
# cloud, concurrent, http, loadbalance
gora.solrstore.solr.solrjserver=http

5、在avro目录下创建pageview.json,内容如下:

{
  "type": "record",
  "name": "Pageview", "default":null,
  "namespace": "org.apache.gora.tutorial.log.generated",
  "fields" : [
    {"name": "url", "type": ["null","string"], "default":null},
    {"name": "timestamp", "type": "long", "default":0},
    {"name": "ip", "type": ["null","string"], "default":null},
    {"name": "httpMethod", "type": ["null","string"], "default":null},
    {"name": "httpStatusCode", "type": "int", "default":0},
    {"name": "responseSize", "type": "int", "default":0},
    {"name": "referrer", "type": ["null","string"], "default":null},
    {"name": "userAgent", "type": ["null","string"], "default":null}
  ]
}


6、根据pageview.json生成java类
$ pwd
/Users/liaoliuqing/99_Project/1_myCodes/MyGoraDemo

$ gora goracompiler avro/pageview.json src/
Compiling: /Users/liaoliuqing/99_Project/1_myCodes/MyGoraDemo/avro/pageview.json
Compiled into: /Users/liaoliuqing/99_Project/1_myCodes/MyGoraDemo/src
Compiler executed SUCCESSFULL.
此命令在src目录下生成一个类:
org.apache.gora.tutorial.log.generated.Pageview.java
内容如下:

/**
 * Autogenerated by Avro
 * 
 * DO NOT EDIT DIRECTLY
 */
package org.apache.gora.tutorial.log.generated;  
@SuppressWarnings("all")
public class Pageview extends org.apache.gora.persistency.impl.PersistentBase implements org.apache.avro.specific.SpecificRecord, org.apache.gora.persistency.Persistent {
  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Pageview\",\"namespace\":\"org.apache.gora.tutorial.log.generated\",\"fields\":[{\"name\":\"url\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"timestamp\",\"type\":\"long\",\"default\":0},{\"name\":\"ip\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"httpMethod\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"httpStatusCode\",\"type\":\"int\",\"default\":0},{\"name\":\"responseSize\",\"type\":\"int\",\"default\":0},{\"name\":\"referrer\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"userAgent\",\"type\":[\"null\",\"string\"],\"default\":null}],\"default\":null}");

  /** Enum containing all data bean's fields. */
  public static enum Field {
    URL(0, "url"),
    TIMESTAMP(1, "timestamp"),
    IP(2, "ip"),
    HTTP_METHOD(3, "httpMethod"),
    HTTP_STATUS_CODE(4, "httpStatusCode"),
    RESPONSE_SIZE(5, "responseSize"),
    REFERRER(6, "referrer"),
    USER_AGENT(7, "userAgent"),
    ;
    /**
     * Field's index.
     */
    private int index;

    /**
     * Field's name.
     */
    private String name;

    /**
     * Field's constructor
     * @param index field's index.
     * @param name field's name.
     */
    Field(int index, String name) {this.index=index;this.name=name;}

    /**
     * Gets field's index.
     * @return int field's index.
     */
    public int getIndex() {return index;}

    /**
     * Gets field's name.
     * @return String field's name.
     */
    public String getName() {return name;}

    /**
     * Gets field's attributes to string.
     * @return String field's attributes to string.
     */
    public String toString() {return name;}
  };

  public static final String[] _ALL_FIELDS = {
  "url",
  "timestamp",
  "ip",
  "httpMethod",
  "httpStatusCode",
  "responseSize",
  "referrer",
  "userAgent",
  };

  /**
   * Gets the total field count.
   * @return int field count
   */
  public int getFieldsCount() {
    return Pageview._ALL_FIELDS.length;
  }

  private java.lang.CharSequence url;
  private long timestamp;
  private java.lang.CharSequence ip;
  private java.lang.CharSequence httpMethod;
  private int httpStatusCode;
  private int responseSize;
  private java.lang.CharSequence referrer;
  private java.lang.CharSequence userAgent;
  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
  // Used by DatumWriter.  Applications should not call. 
  public java.lang.Object get(int field$) {
    switch (field$) {
    case 0: return url;
    case 1: return timestamp;
    case 2: return ip;
    case 3: return httpMethod;
    case 4: return httpStatusCode;
    case 5: return responseSize;
    case 6: return referrer;
    case 7: return userAgent;
    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
    }
  }
  
  // Used by DatumReader.  Applications should not call. 
  @SuppressWarnings(value=http://www.mamicode.com/"unchecked")>

7、创建gora-hbase-mapping.xml,内容如下:
<?xml version="1.0" encoding="UTF-8"?>

<!--
  Gora Mapping file for HBase Backend
-->
<gora-otd>
  <table name="Pageview"> <!-- optional descriptors for tables -->
    <family name="common"/> <!-- This can also have params like compression, bloom filters -->
    <family name="http"/>
    <family name="misc"/>
  </table>

  <class name="org.apache.gora.tutorial.log.generated.Pageview" keyClass="java.lang.Long" table="AccessLog">
    <field name="url" family="common" qualifier="url"/>
    <field name="timestamp" family="common" qualifier="timestamp"/>
    <field name="ip" family="common" qualifier="ip" />
    <field name="httpMethod" family="http" qualifier="httpMethod"/>
    <field name="httpStatusCode" family="http" qualifier="httpStatusCode"/>
    <field name="responseSize" family="http" qualifier="responseSize"/>
    <field name="referrer" family="misc" qualifier="referrer"/>
    <field name="userAgent" family="misc" qualifier="userAgent"/>
  </class>

  <class name="org.apache.gora.tutorial.log.generated.MetricDatum" keyClass="java.lang.String" table="Metrics">
    <field name="metricDimension" family="common"  qualifier="metricDimension"/>
    <field name="timestamp" family="common" qualifier="ts"/>
    <field name="metric" family="common" qualifier="metric"/>
  </class>

</gora-otd>


三、代码编写及分析
1、编写以下代码

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gora.tutorial.log;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.StringTokenizer;

import org.apache.avro.util.Utf8;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.gora.query.Query;
import org.apache.gora.query.Result;
import org.apache.gora.store.DataStore;
import org.apache.gora.store.DataStoreFactory;
import org.apache.gora.tutorial.log.generated.Pageview;
import org.apache.hadoop.conf.Configuration;

/**
 * LogManager is the tutorial class to illustrate the basic 
 * {@link DataStore} API usage. The LogManager class is used 
 * to parse the web server logs in combined log format, store the 
 * data in a Gora compatible data store, query and manipulate the stored data.  
 * 
 * <p>In the data model, keys are the line numbers in the log file, 
 * and the values are Pageview objects, generated from 
 * <code>gora-tutorial/src/main/avro/pageview.json</code>.
 * 
 * <p>See the tutorial.html file in docs or go to the 
 * <a href=http://www.mamicode.com/"http://gora.apache.org/docs/current/tutorial.html"> >


2、在eclipse中run as java application,输出如下:
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/Users/liaoliuqing/99_Project/1_myCodes/MyGoraDemo/lib/slf4j-log4j12-1.6.6.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/liaoliuqing/99_Project/99_userLibrary/log4j_2.0/log4j-slf4j-impl-2.0-rc2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/liaoliuqing/1_BigData/1_Hadoop/0_Official/hadoop-1.2.1/lib/slf4j-log4j12-1.4.3.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
LogManager -parse <input_log_file>
           -get <lineNum>
           -query <lineNum>
           -query <startLineNum> <endLineNum>
           -delete <lineNum>
           -deleteByQuery <startLineNum> <endLineNum>


3、将项目打包并放至服务器中,此服务器要求运行着hbase与zookeeper。


4、将所需要的jar包放入classpath中,主要包括以下内容
[jediael@master bin]$ ls ../lib/
avro-1.7.6.jar      gora-core-0.5.jar   hbase-0.94.26.jar  slf4j-api-1.6.6.jar      zookeeper-3.4.5.jar
avro-ipc-1.5.3.jar  gora-hbase-0.5.jar  jdom-1.1.2.jar     slf4j-log4j12-1.6.6.jar
以及hadoop、hbase相关的jar包。

5、执行程序
在bin目录下执行以下命令
[jediael@master bin]$ java org.apache.gora.tutorial.log.LogManager -parse ../resource/access.log


6、查看结果
$hbase shell

hbase(main):001:0> list
TABLE                                                                                                                                                                   
AccessLog                                                                                                                                                               
Jan2814_webpage                                                                                                                                                         
Jan2819_webpage                                                                                                                                                         
Jan2910_webpage                                                                                                                                                         
member                                                                                                                                                                  
5 row(s) in 1.2440 seconds

hbase(main):002:0> count ‘AccessLog‘
Current count: 1000, row: \x00\x00\x00\x00\x00\x00\x03\xE7                                                                                                              
Current count: 2000, row: \x00\x00\x00\x00\x00\x00\x07\xCF                                                                                                              
Current count: 3000, row: \x00\x00\x00\x00\x00\x00\x0B\xB7                                                                                                              
Current count: 4000, row: \x00\x00\x00\x00\x00\x00\x0F\x9F                                                                                                              
Current count: 5000, row: \x00\x00\x00\x00\x00\x00\x13\x87                                                                                                              
Current count: 6000, row: \x00\x00\x00\x00\x00\x00\x17o                                                                                                                 
Current count: 7000, row: \x00\x00\x00\x00\x00\x00\x1BW                                                                                                                 
Current count: 8000, row: \x00\x00\x00\x00\x00\x00\x1F?                                                                                                                 
Current count: 9000, row: \x00\x00\x00\x00\x00\x00#‘                                                                                                                    
Current count: 10000, row: \x00\x00\x00\x00\x00\x00‘\x0F                                                                                                                
10000 row(s) in 1.8960 seconds



Gora快速入门