首页 > 代码库 > Hive权限之审计
Hive权限之审计
由于在生产环境中大量使用hive,而hive的权限又较弱,如果可以记录所有hive操作,在增强安全性的同时,还能够统计hive表的使用频率;同时如果能够记录hql的开始和结束时间,则能够找出系统中花费时间较多的job,针对性的进行优化,因此跟踪hive的使用轨迹,增强安全的同时也能方便问题定位。如何记录用户操作了?Hive Hook为我们提供的方便的开放接口。
我们对hive的使用主要有两种使用场景,一是平时直接在命令行下执行的hql操作,此时执行hql的实体就是OS的登录用户;另外一种是从webapp获取的业务数据需求人员创建定时报表的hql脚本,此时执行hql的真正实体其实是报表创建者,系统仅仅是代理执行而已,此时记录用户的行为则需要重写hive.security.authenticator.manager。
Hive默认使用HadoopDefaultAuthenticator获取执行hql的用户,使用其返回的用户进行权限验证。为了使hive能够以代理的模式去执行,我们需要提供自己的authenticator,返回真正的hql执行者,以下配置可设置authenticator:
<property>
<name>hive.security.authenticator.manager</name>
<value>com.pplive.bip.hive.auth.Authenticator</value>
<description>bip user authenticator</description>
</property>
只有管理员可以开启代理模式,可以使用以下方式传递代理用户:
Hive -d bip.user=xxx 或 hive --define bip.user=xxx
重写authenticator代码示例:
public classAuthenticator implements HiveAuthenticationProvider {
private finalstaticString BIP_USER="bip.user";
privateStringuserName;
privateStringbipUser;
privateList<String>groupNames;
privateConfigurationconf;
@Override
publicList<String> getGroupNames() {
returngroupNames;
}
@Override
publicStringgetUserName() {
this.bipUser = SessionState.get().getHiveVariables().get(BIP_USER);
if(this.bipUser !=null &&!this.bipUser.isEmpty()) {
if( AdminManager.isAdmin(this.userName)) {
returnthis.bipUser;
} else {
thrownewRuntimeException("bip.user is set while youare not admin");
}
} else{
returnthis.userName;
}
}
@Override
publicvoidsetConf(Configuration conf) {
this.conf = conf;
UserGroupInformation ugi = null;
try{
ugi = ShimLoader.getHadoopShims().getUGIForConf(conf);
// UserGroupInformation.createProxyUser(user, realUser);
} catch(Exception e) {
thrownewRuntimeException(e);
}
if(ugi == null){
thrownewRuntimeException(
"Can not initialize PPLive Authenticator.");
}
this.userName = ugi.getUserName();
if(ugi.getGroupNames() !=null) {
this.groupNames = Arrays.asList(ugi.getGroupNames());
}
}
publicString getProxy() {
return this.userName;
}
Hive提供的SemanticHook可以方便我们记录hql语义分析前后的状态,Execute Hook可以记录hql翻译成job提交执行前后的状态, Driver Hook可以记录包括整个编译执行过程前后的状态。
SemanticHook记录语义分析后的行为:
public voidpostAnalyze(HiveSemanticAnalyzerHookContext context,
List<Task<?extendsSerializable>> rootTasks)
throws SemanticException {
Hivehive = null;
try {
hive= context.getHive();
}catch(HiveException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
Set<ReadEntity>inputs = context.getInputs();
Set<WriteEntity>outputs = context.getOutputs();
Set<String>readTables = newHashSet<String>();
for(ReadEntity input :inputs) {
Table table = input.getT();
if(table!=null) {
readTables.add(table.getTableName());
}
}
Set<String>writeTables = newHashSet<String>();
for(WriteEntity output :outputs) {
Table table = output.getT();
if(table!=null) {
writeTables.add(table.getTableName());
}
}
HiveAuthenticationProviderauthenticationProvider = SessionState.get().getAuthenticator();
if(authenticationProviderinstanceof Authenticator) {
Authenticatorauthenticator = (Authenticator)authenticationProvider; //ip
this.logger.info(String.format("phase=SA&executor=%s&proxy=%s&db=%s&cmd=%s&readTables=%s&writeTables=%s", authenticator.getUserName(),
authenticator.getProxy(), hive.getCurrentDatabase(),context.getCommand(),readTables.toString(),writeTables.toString()));
}
StringuserName = SessionState.get().getAuthenticator().getUserName();
logger.debug(String.format("%s execute %s, read tables:%s, writetables:%s", userName, context.getCommand(),readTables, writeTables));
}
Execute Hook记录job状态:
public classExecuteHook implements ExecuteWithHookContext {
Loggerlogger= Logger.getLogger(DriverRunHook.class);
privateHiveAuthenticationProviderauthenticationProvider =null;
private static final String JOB_START_TIME="PRE_EXEC_HOOK";
private static SimpleDateFormat dateFormat =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
@Override
public void run(HookContexthookContext) throwsException {
QueryPlanqueryPlan = hookContext.getQueryPlan();
StringqueryId = queryPlan.getQueryId();
StringqueryStr = queryPlan.getQueryStr();
if(authenticationProvider==null){
authenticationProvider= SessionState.get().getAuthenticator();
}
Stringresult = null;
switch(hookContext.getHookType()){
//hive.exec.pre.hooks
case PRE_EXEC_HOOK:
hookContext.getConf().setLong(JOB_START_TIME,System.currentTimeMillis());
break;
//hive.exec.post.hooks
case POST_EXEC_HOOK:
result= "Success";
break;
//hive.exec.failure.hooks
case ON_FAILURE_HOOK:
result= "Failure";
break;
default:
break;
}
if(hookContext.getHookType()!= HookContext.HookType.PRE_EXEC_HOOK&&authenticationProviderinstanceofAuthenticator) {
long jobEndTime = System.currentTimeMillis();
HiveConfconf = hookContext.getConf();
long jobStartTime =conf.getLong(JOB_START_TIME, jobEndTime);
long timeTaken =(jobEndTime-jobStartTime)/1000;
Authenticatorauthenticator = (Authenticator)authenticationProvider; //ip
this.logger.info(String.format("phase=EXEC&result=%s&executor=%s&proxy=%s&db=%s&queryId=%s&queryStr=%s&jobName=%s&jobStartTime=%s&jobEndTime=%s&timeTaken=%d", result,authenticator.getUserName(),authenticator.getProxy(),
Hive.get().getCurrentDatabase(),queryId, queryStr,conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME),dateFormat.format(new Date(jobStartTime)),
dateFormat.format(newDate(jobEndTime)),timeTaken));
}
}
}
DriverHook记录整个过程执行时间:
public classDriverRunHook implements HiveDriverRunHook{
Loggerlogger= Logger.getLogger(DriverRunHook.class);
privateHiveAuthenticationProviderauthenticationProvider =null;
private static SimpleDateFormat dateFormat =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private long startTime = 0;
@Override
public voidpreDriverRun(HiveDriverRunHookContext hookContext)
throws Exception {
if(authenticationProvider==null){
authenticationProvider= SessionState.get().getAuthenticator();
}
startTime = System.currentTimeMillis();
}
@Override
public voidpostDriverRun(HiveDriverRunHookContext hookContext)
throws Exception {
if(authenticationProviderinstanceofAuthenticator) {
long endTime = System.currentTimeMillis();
long timeTaken = (endTime-startTime)/1000;
Authenticatorauthenticator = (Authenticator)authenticationProvider; //ip
this.logger.info(String.format("phase=DriverRun&executor=%s&proxy=%s&db=%s&cmd=%s&startTime=%s&endTime=%s&timeTaken=%d", authenticator.getUserName(),authenticator.getProxy(),
Hive.get().getCurrentDatabase(),hookContext.getCommand(),dateFormat.format(newDate(startTime)),dateFormat.format(new Date(endTime)),timeTaken));
}
}
}