首页 > 代码库 > 设计模式之抽象工厂模式:Hadoop序列化框架

设计模式之抽象工厂模式:Hadoop序列化框架

参考:


http://www.w3cschool.cc/design-pattern/abstract-factory-pattern.html,《hadoop技术内幕》

介绍

意图:提供一个创建一系列相关或相互依赖对象的接口,而无需指定它们具体的类。

主要解决:主要解决接口选择的问题。

何时使用:系统的产品有多于一个的产品族,而系统只消费其中某一族的产品。

如何解决:在一个产品族里面,定义多个产品。

关键代码:在一个工厂里聚合多个同类产品。

应用实例:工作了,为了参加一些聚会,肯定有两套或多套衣服吧,比如说有商务装(成套,一系列具体产品)、时尚装(成套,一系列具体产品),甚至对于一个家庭来说,可能有商务女装、商务男装、时尚女装、时尚男装,这些也都是成套的,即一系列具体产品。假设一种情况(现实中是不存在的,要不然,没法进入共产主义了,但有利于说明抽象工厂模式),在您的家中,某一个衣柜(具体工厂)只能存放某一种这样的衣服(成套,一系列具体产品),每次拿这种成套的衣服时也自然要从这个衣柜中取出了。用 OO 的思想去理解,所有的衣柜(具体工厂)都是衣柜类的(抽象工厂)某一个,而每一件成套的衣服又包括具体的上衣(某一具体产品),裤子(某一具体产品),这些具体的上衣其实也都是上衣(抽象产品),具体的裤子也都是裤子(另一个抽象产品)。

优点:当一个产品族中的多个对象被设计成一起工作时,它能保证客户端始终只使用同一个产品族中的对象。

缺点:产品族扩展非常困难,要增加一个系列的某一产品,既要在抽象的 Creator 里加代码,又要在具体的里面加代码。

使用场景: 1、QQ 换皮肤,一整套一起换。 2、生成不同操作系统的程序。

注意事项:产品族难扩展,产品等级易扩展。

总结:抽象工厂就是生产工厂的工厂,可以从里边取一类产品。hadoop序列化框架中,Serializer和Deserializer实例相反,相互依赖,必须通过抽象工厂Serialization,才能获得对应的实现。

相关代码如下(hadoop-1.0.0的代码,省略注释):

package org.apache.hadoop.io.serializer;
public interface Serialization<T> {        //Serialization抽象工厂,为Serializer和Deserializer对象创建抽象类来获取工厂。
  
  boolean accept(Class<?> c);
  
  Serializer<T> getSerializer(Class<T> c); //得到Serializer,定义为接口

  Deserializer<T> getDeserializer(Class<T> c); //得到Deserializer,定义为接口
}
package org.apache.hadoop.io.serializer;

import java.io.IOException;
import java.io.OutputStream;

public interface Serializer<T> {             //为Serializer创建一个接口
  void open(OutputStream out) throws IOException;
  
  void serialize(T t) throws IOException;
  
  void close() throws IOException;
}
package org.apache.hadoop.io.serializer;

import java.io.IOException;
import java.io.InputStream;

public interface Deserializer<T> {            //为Deserializer创建一个接口
  void open(InputStream in) throws IOException;
  
  T deserialize(T t) throws IOException;
  
  void close() throws IOException;
}
package org.apache.hadoop.io.serializer;

import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Serializable;

public class JavaSerialization implements Serialization<Serializable> {   
  //创建扩展了Serialization的工厂类JavaSerialization ,基于给定的信息生成实体类的对象。
  static class JavaSerializationDeserializer<T extends Serializable>
    implements Deserializer<T> {

    private ObjectInputStream ois;

    public void open(InputStream in) throws IOException {
      ois = new ObjectInputStream(in) {
        @Override protected void readStreamHeader() {
          // no header
        }
      };
    }
    
    @SuppressWarnings("unchecked")
    public T deserialize(T object) throws IOException {
      try {
        // ignore passed-in object
        return (T) ois.readObject();
      } catch (ClassNotFoundException e) {
        throw new IOException(e.toString());
      }
    }

    public void close() throws IOException {
      ois.close();
    }

  }
  
  static class JavaSerializationSerializer
    implements Serializer<Serializable> {

    private ObjectOutputStream oos;

    public void open(OutputStream out) throws IOException {
      oos = new ObjectOutputStream(out) {
        @Override protected void writeStreamHeader() {
          // no header
        }
      };
    }

    public void serialize(Serializable object) throws IOException {
      oos.reset(); // clear (class) back-references
      oos.writeObject(object);
    }

    public void close() throws IOException {
      oos.close();
    }

  }

  public boolean accept(Class<?> c) {
    return Serializable.class.isAssignableFrom(c);
  }

  public Deserializer<Serializable> getDeserializer(Class<Serializable> c) {
    return new JavaSerializationDeserializer<Serializable>();
  }

  public Serializer<Serializable> getSerializer(Class<Serializable> c) {
    return new JavaSerializationSerializer();
  }

}
package org.apache.hadoop.io.serializer;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;

public class WritableSerialization extends Configured implements Serialization<Writable> {
  //创建扩展了Serialization的工厂类WritableSerialization,基于给定的信息生成实体类的对象。
  
  static class WritableDeserializer extends Configured 
    implements Deserializer<Writable> {

    private Class<?> writableClass;
    private DataInputStream dataIn;
    
    public WritableDeserializer(Configuration conf, Class<?> c) {
      setConf(conf);
      this.writableClass = c;
    }
    
    public void open(InputStream in) {
      if (in instanceof DataInputStream) {
        dataIn = (DataInputStream) in;
      } else {
        dataIn = new DataInputStream(in);
      }
    }
    
    public Writable deserialize(Writable w) throws IOException {
      Writable writable;
      if (w == null) {
        writable 
          = (Writable) ReflectionUtils.newInstance(writableClass, getConf());
      } else {
        writable = w;
      }
      writable.readFields(dataIn);
      return writable;
    }

    public void close() throws IOException {
      dataIn.close();
    }
    
  }
  
  static class WritableSerializer implements Serializer<Writable> {

    private DataOutputStream dataOut;
    
    public void open(OutputStream out) {
      if (out instanceof DataOutputStream) {
        dataOut = (DataOutputStream) out;
      } else {
        dataOut = new DataOutputStream(out);
      }
    }

    public void serialize(Writable w) throws IOException {
      w.write(dataOut);
    }

    public void close() throws IOException {
      dataOut.close();
    }

  }

  public boolean accept(Class<?> c) {
    return Writable.class.isAssignableFrom(c);
  }

  public Deserializer<Writable> getDeserializer(Class<Writable> c) {
    return new WritableDeserializer(getConf(), c);
  }

  public Serializer<Writable> getSerializer(Class<Writable> c) {
    return new WritableSerializer();
  }

}
package org.apache.hadoop.io.serializer;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;

public class SerializationFactory extends Configured { 
  //创建一个工厂创造器/生成器类,通过传递类(Class<T> c)信息来获取工厂。
  private static final Log LOG =
    LogFactory.getLog(SerializationFactory.class.getName());

  private List<Serialization<?>> serializations = new ArrayList<Serialization<?>>();
  
  public SerializationFactory(Configuration conf) {
    super(conf);
    for (String serializerName : conf.getStrings("io.serializations", 
      new String[]{"org.apache.hadoop.io.serializer.WritableSerialization"})) {
      add(conf, serializerName);
    }
  }
  
  @SuppressWarnings("unchecked")
  private void add(Configuration conf, String serializationName) {
    try {
      
      Class<? extends Serialization> serializionClass =
        (Class<? extends Serialization>) conf.getClassByName(serializationName);
      serializations.add((Serialization)
          ReflectionUtils.newInstance(serializionClass, getConf()));
    } catch (ClassNotFoundException e) {
      LOG.warn("Serilization class not found: " +
          StringUtils.stringifyException(e));
    }
  }

  public <T> Serializer<T> getSerializer(Class<T> c) {
    return getSerialization(c).getSerializer(c);
  }

  public <T> Deserializer<T> getDeserializer(Class<T> c) {
    return getSerialization(c).getDeserializer(c);
  }

  @SuppressWarnings("unchecked")
  public <T> Serialization<T> getSerialization(Class<T> c) {
    for (Serialization serialization : serializations) {
      if (serialization.accept(c)) {
        return (Serialization<T>) serialization;
      }
    }
    return null;
  }
}







设计模式之抽象工厂模式:Hadoop序列化框架