首页 > 代码库 > Spark编程实现SQL查询的实例

Spark编程实现SQL查询的实例

 

1、Oracle中的SQL

select count(1)from a_V_PWYZL_CUSTACCT_PSMIS t   where not exists (select 1            from tb_show_multi_question q           WHERE q.dqmp_rule_code = ‘仅比对系统有‘             and q.dqmp_role_id = ‘105754659‘             and q.DQMP_target_id = t.dqmp_mrid)     AND NOT EXISTS (select /*+ index(s) */           1            from a_V_PWYZL_CUSTACCT_GIS s           where s.dqmp_cpk = t.dqmp_cpk)     and t.is_repeat = ‘0‘;

2、Hive/Shark版<style type="text/css">.csharpcode, .csharpcode pre{ font-size: small; color: black; font-family: consolas, "Courier New", courier, monospace; background-color: #ffffff; /*white-space: pre;*/}.csharpcode pre { margin: 0em; }.csharpcode .rem { color: #008000; }.csharpcode .kwrd { color: #0000ff; }.csharpcode .str { color: #006080; }.csharpcode .op { color: #0000c0; }.csharpcode .preproc { color: #cc6633; }.csharpcode .asp { background-color: #ffff00; }.csharpcode .html { color: #800000; }.csharpcode .attr { color: #ff0000; }.csharpcode .alt { background-color: #f4f4f4; width: 100%; margin: 0em;}.csharpcode .lnum { color: #606060; }</style>

select count(1) from  (  select t.*,q.dqmp_question_id(列索引q.0) as f1,s.gis_mrid(列索引s.0)as f2  from (select * from a_V_PWYZL_CUSTACCT_PSMIS t where t.is_repeat(列索引t.19) = ‘0‘) t       left outer join (select * from tb_show_multi_question q where q.dqmp_rule_code(列索引q.26)= ‘仅比对系统有‘ and q.dqmp_role_id(列索引q.31)= ‘105754659‘ ) q on q.DQMP_target_id(列索引q.13)= t.dqmp_mrid(列索引t.32)       left outer join a_V_PWYZL_CUSTACCT_GIS s on s.dqmp_cpk(列索引s.31)= t.dqmp_cpk(列索列t.31)  ) tvwhere tv.f1 is NULL and tv.f2 is NULL;
<style type="text/css">.csharpcode, .csharpcode pre{ font-size: small; color: black; font-family: consolas, "Courier New", courier, monospace; background-color: #ffffff; /*white-space: pre;*/}.csharpcode pre { margin: 0em; }.csharpcode .rem { color: #008000; }.csharpcode .kwrd { color: #0000ff; }.csharpcode .str { color: #006080; }.csharpcode .op { color: #0000c0; }.csharpcode .preproc { color: #cc6633; }.csharpcode .asp { background-color: #ffff00; }.csharpcode .html { color: #800000; }.csharpcode .attr { color: #ff0000; }.csharpcode .alt { background-color: #f4f4f4; width: 100%; margin: 0em;}.csharpcode .lnum { color: #606060; }</style>

 

3、Spark编程实现

 

package cn.com.gzkit.spark
 
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.PairRDDFunctions;
 
object test2 {
 
  def main(args: Array[String]) {
    
      var master="spark://kit-b1:7077";
      
      var sc=new SparkContext(master, "HdfsTest",System.getenv("SPARK_HOME"),SparkContext.jarOfClass(this.getClass));
      var file1 = sc.textFile("hdfs://kit-b1/demodata/utf8_a_v_pwyzl_custacct_psmis.txt").map(_.split(‘|‘));
      var file2 = sc.textFile("hdfs://kit-b1/demodata/utf8_a_v_pwyzl_custacct_gis.txt").map(_.split(‘|‘)).map(m=>(m(31),m(0)));
      var file3 = sc.textFile("hdfs://kit-b1/demodata/utf8_tb_show_multi_question.txt").map(_.split(‘|‘)).map(m=>(m(13),m(31),m(26),m(0)));
 
      file1.cache();
      file2.cache();
      file3.cache();
      
      //file1.count();
      //file2.count();
      //file3.count();
      
      
      System.out.println("-----------begin-----------------");
      
      
      var t=file1.filter(_(19)=="0");
      var t1=t.map(t=>(t(32),t));
      
      var q=file3.filter(_._2=="105754659").filter(_._3=="仅比对系统有");
      var q1=q.map(q=>(q._1,q));
      
      var tq=t1.leftOuterJoin(q1);
      
      
      System.out.println("-----------end-----------------");
     
      
      
      System.out.println("-----------begin2-----------------");
      var t2=tq.map(m=>(m._2._1(31),m._2));
      var s=file2;
      var ts=t2.leftOuterJoin(s);
      
      var rs=ts.filter(_._2._2==None).filter(_._2._1._2==None);
      
      System.out.println("sava file");
      rs.saveAsTextFile("hdfs://kit-b1/demodata/test/02");
      System.out.println("sava file end");
      var v2=rs.count();
      
      System.out.println("v2="+v2);
      System.out.println("-----------end2-----------------");
      
 
      sc.stop();
      
  }
  
}
<style type="text/css">.csharpcode, .csharpcode pre{ font-size: small; color: black; font-family: consolas, "Courier New", courier, monospace; background-color: #ffffff; /*white-space: pre;*/}.csharpcode pre { margin: 0em; }.csharpcode .rem { color: #008000; }.csharpcode .kwrd { color: #0000ff; }.csharpcode .str { color: #006080; }.csharpcode .op { color: #0000c0; }.csharpcode .preproc { color: #cc6633; }.csharpcode .asp { background-color: #ffff00; }.csharpcode .html { color: #800000; }.csharpcode .attr { color: #ff0000; }.csharpcode .alt { background-color: #f4f4f4; width: 100%; margin: 0em;}.csharpcode .lnum { color: #606060; }</style>