Commit 9cdc25ee by a760104

Initial Committ of Hivehook written by aetna..

todo: add more parser functionality around astnode specifically around partitions, drops, updates and alters
parent 82209db0
<factorypath>
<factorypathentry kind="PLUGIN" id="com.ibm.jee.annotations.processor" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="com.ibm.etools.javaee.cdi.ext.ui" enabled="false" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="com.ibm.jaxrs.annotations.processor" enabled="false" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="org.eclipse.jst.ws.annotations.core" enabled="true" runInBatchMode="false"/>
</factorypath>
<?xml version="1.0"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-governance</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
</parent>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-hivehook</artifactId>
<version>0.1-incubating-SNAPSHOT</version>
<name>metadata-hivehook</name>
<packaging>jar</packaging>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<repositories>
<repository>
<id>central</id>
<!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
<name>Maven Repository</name>
<url>https://repo1.maven.org/maven2</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>apache-repo</id>
<name>Apache Repository</name>
<url>https://repository.apache.org/content/repositories/releases</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<fork>true</fork>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.3.2</version>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.13.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.13.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>
package com.aetna.hadoop.dgc.hive;
import java.io.Serializable;
import java.util.List;
import java.util.ArrayList;
public class HiveLineageBean implements Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
public String queryId;
public String user;
public String queryStartTime;
public String queryEndTime;
public String query;
public String tableName;
public String tableLocation;
ArrayList<SourceTables> sourceTables;
ArrayList<QueryColumns> queryColumns;
ArrayList<WhereClause> whereClause;
ArrayList<CreateColumns> createColumns;
ArrayList<GroupBy> groupBy;
ArrayList<GroupBy> orderBy;
public String getQueryId() {
return this.queryId ;
}
public void setQueryId(String queryId) {
this.queryId = queryId;
}
public String getTableName() {
return this.tableName ;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public String getTableLocation() {
return this.tableLocation ;
}
public void setTableLocation(String tableLocation) {
this.tableLocation = tableLocation;
}
public String getUser() {
return this.user ;
}
public void setUser(String user) {
this.user = user;
}
public String getQueryStartTime() {
return this.queryStartTime ;
}
public void setQueryStartTime(String queryStartTime) {
this.queryStartTime = queryStartTime;
}
public String getQueryEndTime() {
return this.queryEndTime ;
}
public void setQueryEndTime(String queryEndTime) {
this.queryEndTime = queryEndTime;
}
public String getQuery() {
return this.query ;
}
public void setQuery(String query) {
this.query = query;
}
public ArrayList<SourceTables> getSourceTables() {
return this.sourceTables ;
}
public void setSourceTables(ArrayList<SourceTables> sourceTables) {
this.sourceTables = sourceTables;
}
public ArrayList<QueryColumns> getQueryColumns() {
return this.queryColumns ;
}
public void setQueryColumns(ArrayList<QueryColumns> queryColumns) {
this.queryColumns = queryColumns;
}
public ArrayList<WhereClause> getWhereClause() {
return this.whereClause ;
}
public void setWhereClause(ArrayList<WhereClause> whereClause) {
this.whereClause = whereClause;
}
public ArrayList<GroupBy> getGroupBy() {
return this.groupBy ;
}
public void setGroupBy(ArrayList<GroupBy> groupBy) {
this.groupBy = groupBy;
}
public class SourceTables {
public String tableName;
public String tableAlias;
public String databaseName;
public String getTableName() {
return this.tableName ;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public String getTableAlias() {
return this.tableAlias ;
}
public void setTableAlias(String tableAlias) {
this.tableAlias = tableAlias;
}
public String getDatabaseName() {
return this.databaseName ;
}
public void setDatabaseName(String databaseName) {
this.databaseName = databaseName;
}
}
public class QueryColumns {
public String tbAliasOrName;
public String columnName;
public String columnAlias;
public String columnFunction;
public String getTbAliasOrName() {
return this.tbAliasOrName ;
}
public void setTbAliasOrName(String tbAliasOrName) {
this.tbAliasOrName = tbAliasOrName;
}
public String getColumnName() {
return this.columnName ;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnAlias() {
return this.columnAlias ;
}
public void setColumnAlias(String columnAlias) {
this.columnAlias = columnAlias;
}
public String getColumnFunction() {
return this.columnFunction ;
}
public void setColumnFunction(String columnFunction) {
this.columnFunction = columnFunction;
}
}
public class GroupBy {
public String tbAliasOrName;
public String columnName;
public String getTbAliasOrName() {
return this.tbAliasOrName ;
}
public void setTbAliasOrName(String tbAliasOrName) {
this.tbAliasOrName = tbAliasOrName;
}
public String getColumnName() {
return this.columnName ;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
}
public class WhereClause {
public String tbAliasOrName;
public String columnCondition;
public String columnName;
public String columnOperator;
public String columnValue;
public String getColumnCondition() {
return this.columnCondition ;
}
public void setColumnCondition(String columnCondition) {
this.columnCondition = columnCondition;
}
public String getTbAliasOrName() {
return this.tbAliasOrName ;
}
public void setTbAliasOrName(String tbAliasOrName) {
this.tbAliasOrName = tbAliasOrName;
}
public String getColumnName() {
return this.columnName ;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnOperator() {
return this.columnOperator ;
}
public void setColumnOperator(String columnOperator) {
this.columnOperator = columnOperator;
}
public String getColumnValue() {
return this.columnValue ;
}
public void setColumnValue(String columnValue) {
this.columnValue = columnValue;
}
}
public ArrayList<CreateColumns> getCreateColumns() {
return this.createColumns ;
}
public void setCreateColumns(ArrayList<CreateColumns> createColumns) {
this.createColumns = createColumns;
}
public class CreateColumns {
public String columnName;
public String columnType;
public String getColumnName() {
return this.columnName ;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnType() {
return this.columnType ;
}
public void setColumnType(String columnType) {
this.columnType = columnType;
}
}
}
package com.aetna.hadoop.dgc.hive;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.gson.Gson;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.util.StringUtils;
//import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
//import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent;
//import org.apache.hadoop.yarn.client.api.TimelineClient;
//import org.apache.hadoop.yarn.conf.YarnConfiguration;
/**
* DGC Hook sends query + plan info to DGCCollector Service. To enable (hadoop 2.4 and up) set
* hive.exec.pre.hooks/hive.exec.post.hooks/hive.exec.failure.hooks to include this class.
*/
public class Hook implements ExecuteWithHookContext {
private static final Log LOG = LogFactory.getLog(Hook.class.getName());
//private static TimelineClient timelineClient;
private enum EntityTypes { HIVE_QUERY_ID };
private enum EventTypes { QUERY_SUBMITTED, QUERY_COMPLETED };
private enum OtherInfoTypes { QUERY, STATUS, TEZ, MAPRED };
private enum PrimaryFilterTypes { user };
private static final int WAIT_TIME = 3;
private HiveLineageBean hlb;
@Override
public void run(HookContext hookContext) throws Exception {
long currentTime = System.currentTimeMillis();
try {
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
ExplainTask explain = new ExplainTask();
explain.initialize(hookContext.getConf(), plan, null);
List<Task<?>> rootTasks = plan.getRootTasks();
String queryId = plan.getQueryId();
String queryStartTime = plan.getQueryStartTime().toString();
String user = hookContext.getUgi().getUserName();
String query = plan.getQueryStr();
int numMrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
int numTezJobs = Utilities.getTezTasks(plan.getRootTasks()).size();
switch(hookContext.getHookType()) {
case PRE_EXEC_HOOK:
Set<ReadEntity> db = hookContext.getInputs();
for (Object o : db) {
LOG.error("DB:Table="+o.toString());
}
break;
case POST_EXEC_HOOK:
currentTime = System.currentTimeMillis();
HiveLineageInfo lep = new HiveLineageInfo();
lep.getLineageInfo(query);
hlb=lep.getHLBean();
hlb.setQueryEndTime(Long.toString(currentTime));
hlb.setQueryId(queryId);
hlb.setQuery(query);
hlb.setUser(user);
hlb.setQueryStartTime(queryStartTime);
fireAndForget(hookContext.getConf(), hlb, queryId);
break;
case ON_FAILURE_HOOK:
// ignore
break;
default:
//ignore
break;
}
} catch (Exception e) {
LOG.info("Failed to submit plan to DGC: " + StringUtils.stringifyException(e));
}
}
public void fireAndForget(Configuration conf, HiveLineageBean hookData, String queryId) throws Exception {
String postUri = "http://167.69.111.50:20810/HiveHookCollector/HookServlet";
if (conf.getTrimmed("aetna.hive.hook") != null) {
postUri = conf.getTrimmed("aetna.hive.hook");
}
Gson gson = new Gson();
String gsonString = gson.toJson(hookData);
System.out.println("GSON String: "+gsonString);
String encodedGsonQuery = URLEncoder.encode(gsonString, "UTF-8");
String encodedQueryId = URLEncoder.encode(queryId, "UTF-8");
String postData = "hookdata=" + encodedGsonQuery+"&queryid="+encodedQueryId;
// Create a trust manager that does not validate certificate chains
if (postUri.contains("https:")) {
TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] certs, String authType) {
}
}
};
// Install the all-trusting trust manager
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
} catch (Exception e) {
e.printStackTrace();
}
}
URL url = new URL(postUri);
System.out.println("Post URI: "+postUri);
DataOutputStream wr = null;
//HttpURLConnection urlcon = null;
if (postUri.contains("https:")) {
HttpsURLConnection urlcon = null;
urlcon = (HttpsURLConnection)url.openConnection();
urlcon.setRequestMethod("POST");
urlcon.setRequestProperty("X-Requested-By", "HiveHook");
urlcon.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
urlcon.setUseCaches(false);
urlcon.setDoInput(true);
urlcon.setDoOutput(true);
wr = new DataOutputStream (urlcon.getOutputStream());
System.out.println("PostString: "+postData);
//wr.writeBytes(postString.);
wr.write(postData.getBytes());
wr.flush ();
wr.close ();
InputStream is = urlcon.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
int numCharsRead;
char[] charArray = new char[1024];
StringBuffer sb = new StringBuffer();
while ((numCharsRead = isr.read(charArray)) > 0) {
sb.append(charArray, 0, numCharsRead);
}
String result = sb.toString();
System.out.println("Post Response: "+result);
isr.close();
is.close();
urlcon.disconnect();
} else {
HttpURLConnection urlcon = null;
urlcon = (HttpURLConnection)url.openConnection();
urlcon.setRequestMethod("POST");
urlcon.setRequestProperty("X-Requested-By", "HiveHook");
urlcon.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
urlcon.setUseCaches(false);
urlcon.setDoInput(true);
urlcon.setDoOutput(true);
wr = new DataOutputStream (urlcon.getOutputStream());
System.out.println("PostString: "+postData);
//wr.writeBytes(postString.);
wr.write(postData.getBytes());
wr.flush ();
wr.close ();
InputStream is = urlcon.getInputStream();
InputStreamReader isr = new InputStreamReader(is);
int numCharsRead;
char[] charArray = new char[1024];
StringBuffer sb = new StringBuffer();
while ((numCharsRead = isr.read(charArray)) > 0) {
sb.append(charArray, 0, numCharsRead);
}
String result = sb.toString();
System.out.println("Post Response: "+result);
isr.close();
is.close();
urlcon.disconnect();
}
}
}
......@@ -315,6 +315,7 @@
<module>repository</module>
<module>webapp</module>
<module>docs</module>
<module>metadata-hivehook</module>
</modules>
<repositories>
......
<factorypath>
<factorypathentry kind="PLUGIN" id="com.ibm.jee.annotations.processor" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="com.ibm.etools.javaee.cdi.ext.ui" enabled="false" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="com.ibm.jaxrs.annotations.processor" enabled="false" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="org.eclipse.jst.ws.annotations.core" enabled="true" runInBatchMode="false"/>
</factorypath>
<factorypath>
<factorypathentry kind="PLUGIN" id="com.ibm.jaxrs.annotations.processor" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="com.ibm.jee.annotations.processor" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="com.ibm.etools.javaee.cdi.ext.ui" enabled="false" runInBatchMode="false"/>
<factorypathentry kind="PLUGIN" id="org.eclipse.jst.ws.annotations.core" enabled="true" runInBatchMode="false"/>
</factorypath>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment