Commit e5feaabf by Shwetha GS

hive hook for create database

parent 7f4227e2
......@@ -44,15 +44,10 @@
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.calcite</groupId>
......@@ -69,6 +64,7 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.calcite</groupId>
......@@ -82,13 +78,10 @@
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
</dependency>
<dependency>
......@@ -104,31 +97,105 @@
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-repository</artifactId>
<artifactId>metadata-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-webapp</artifactId>
<version>${project.version}</version>
<type>war</type>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/dependency</outputDirectory>
<includeScope>runtime</includeScope>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.mortbay.jetty</groupId>
<artifactId>maven-jetty-plugin</artifactId>
<version>${jetty.version}</version>
<configuration>
<excludes>
<exclude>resources/*-site.xml</exclude>
</excludes>
<skip>${skipTests}</skip>
<!--only skip int tests -->
<connectors>
<connector implementation="org.mortbay.jetty.nio.SelectChannelConnector">
<port>21000</port>
<maxIdleTime>60000</maxIdleTime>
</connector>
</connectors>
<webApp>../../webapp/target/metadata-webapp-${project.version}.war</webApp>
<contextPath>/</contextPath>
<useTestClasspath>true</useTestClasspath>
<systemProperties>
<systemProperty>
<name>metadata.log.dir</name>
<value>${project.build.directory}/logs</value>
</systemProperty>
</systemProperties>
<stopKey>metadata-stop</stopKey>
<stopPort>41001</stopPort>
</configuration>
<executions>
<execution>
<id>start-jetty</id>
<phase>pre-integration-test</phase>
<goals>
<goal>deploy-war</goal>
</goals>
<configuration>
<daemon>true</daemon>
</configuration>
</execution>
<execution>
<id>stop-jetty</id>
<phase>post-integration-test</phase>
<goals>
<goal>stop</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
......
......@@ -45,6 +45,21 @@ import java.util.List;
* and registers then in DGI.
*/
public class HiveMetaStoreBridge {
static class Pair<S, T> {
public S first;
public T second;
public Pair(S first, T second) {
this.first = first;
this.second = second;
}
public static <S, T> Pair of(S first, T second) {
return new Pair(first, second);
}
}
public static final String DGI_URL_PROPERTY = "hive.dgi.url";
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
......@@ -53,16 +68,15 @@ public class HiveMetaStoreBridge {
/**
* Construct a HiveMetaStoreBridge.
* @param baseUrl metadata service url
* @param hiveConf
*/
public HiveMetaStoreBridge(String baseUrl) throws Exception {
hiveMetaStoreClient = createHiveMetaStoreClient();
metadataServiceClient = new MetadataServiceClient(baseUrl);
public HiveMetaStoreBridge(HiveConf hiveConf) throws Exception {
hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
metadataServiceClient = new MetadataServiceClient(hiveConf.get(DGI_URL_PROPERTY));
}
private HiveMetaStoreClient createHiveMetaStoreClient() throws Exception {
HiveConf conf = new HiveConf();
return new HiveMetaStoreClient(conf);
public MetadataServiceClient getMetadataServiceClient() {
return metadataServiceClient;
}
public void importHiveMetadata() throws Exception {
......@@ -73,16 +87,18 @@ public class HiveMetaStoreBridge {
private void importDatabases() throws Exception {
List<String> databases = hiveMetaStoreClient.getAllDatabases();
for (String databaseName : databases) {
importDatabase(databaseName);
Referenceable dbReference = registerDatabase(databaseName);
importTables(databaseName, dbReference);
}
}
private void importDatabase(String databaseName) throws Exception {
public Referenceable registerDatabase(String databaseName) throws Exception {
LOG.info("Importing objects from databaseName : " + databaseName);
Database hiveDB = hiveMetaStoreClient.getDatabase(databaseName);
Referenceable dbRef = new Referenceable(HiveDataTypes.HIVE_DB.name());
Referenceable dbRef = new Referenceable(HiveDataTypes.HIVE_DB.getName());
dbRef.set("name", hiveDB.getName());
dbRef.set("description", hiveDB.getDescription());
dbRef.set("locationUri", hiveDB.getLocationUri());
......@@ -90,12 +106,10 @@ public class HiveMetaStoreBridge {
dbRef.set("ownerName", hiveDB.getOwnerName());
dbRef.set("ownerType", hiveDB.getOwnerType().getValue());
Referenceable databaseReferenceable = createInstance(dbRef);
importTables(databaseName, databaseReferenceable);
return createInstance(dbRef);
}
private Referenceable createInstance(Referenceable referenceable) throws Exception {
public Referenceable createInstance(Referenceable referenceable) throws Exception {
String typeName = referenceable.getTypeName();
LOG.debug("creating instance of type " + typeName);
......@@ -108,22 +122,26 @@ public class HiveMetaStoreBridge {
return new Referenceable(guid, referenceable.getTypeName(), referenceable.getValuesMap());
}
private void importTables(String databaseName,
Referenceable databaseReferenceable) throws Exception {
private void importTables(String databaseName, Referenceable databaseReferenceable) throws Exception {
List<String> hiveTables = hiveMetaStoreClient.getAllTables(databaseName);
for (String tableName : hiveTables) {
importTable(databaseName, tableName, databaseReferenceable);
Pair<Referenceable, Referenceable> tableReferenceable = registerTable(databaseReferenceable, databaseName, tableName);
// Import Partitions
importPartitions(databaseName, tableName, databaseReferenceable, tableReferenceable.first, tableReferenceable.second);
// Import Indexes
importIndexes(databaseName, tableName, databaseReferenceable, tableReferenceable.first);
}
}
private void importTable(String db, String tableName,
Referenceable databaseReferenceable) throws Exception {
LOG.info("Importing objects from " + db + "." + tableName);
public Pair<Referenceable, Referenceable> registerTable(Referenceable dbReference, String dbName, String tableName) throws Exception {
LOG.info("Importing objects from " + dbName + "." + tableName);
Table hiveTable = hiveMetaStoreClient.getTable(db, tableName);
Table hiveTable = hiveMetaStoreClient.getTable(dbName, tableName);
Referenceable tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.name());
Referenceable tableRef = new Referenceable(HiveDataTypes.HIVE_TABLE.getName());
tableRef.set("tableName", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner());
tableRef.set("createTime", hiveTable.getCreateTime());
......@@ -131,7 +149,7 @@ public class HiveMetaStoreBridge {
tableRef.set("retention", hiveTable.getRetention());
// add reference to the database
tableRef.set("dbName", databaseReferenceable);
tableRef.set("dbName", dbReference);
// add reference to the StorageDescriptor
StorageDescriptor storageDesc = hiveTable.getSd();
......@@ -143,7 +161,7 @@ public class HiveMetaStoreBridge {
Referenceable colRef;
if (hiveTable.getPartitionKeysSize() > 0) {
for (FieldSchema fs : hiveTable.getPartitionKeys()) {
colRef = new Referenceable(HiveDataTypes.HIVE_COLUMN.name());
colRef = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
......@@ -168,12 +186,7 @@ public class HiveMetaStoreBridge {
tableRef.set("temporary", hiveTable.isTemporary());
Referenceable tableReferenceable = createInstance(tableRef);
// Import Partitions
importPartitions(db, tableName, databaseReferenceable, tableReferenceable, sdReferenceable);
// Import Indexes
importIndexes(db, tableName, databaseReferenceable, tableRef);
return Pair.of(tableReferenceable, sdReferenceable);
}
private void importPartitions(String db, String table,
......@@ -194,7 +207,7 @@ public class HiveMetaStoreBridge {
Referenceable dbReferenceable,
Referenceable tableReferenceable,
Referenceable sdReferenceable) throws Exception {
Referenceable partRef = new Referenceable(HiveDataTypes.HIVE_PARTITION.name());
Referenceable partRef = new Referenceable(HiveDataTypes.HIVE_PARTITION.getName());
partRef.set("values", hivePart.getValues());
partRef.set("dbName", dbReferenceable);
......@@ -227,7 +240,7 @@ public class HiveMetaStoreBridge {
private void importIndex(Index index,
Referenceable dbReferenceable,
Referenceable tableReferenceable) throws Exception {
Referenceable indexRef = new Referenceable(HiveDataTypes.HIVE_INDEX.name());
Referenceable indexRef = new Referenceable(HiveDataTypes.HIVE_INDEX.getName());
indexRef.set("indexName", index.getIndexName());
indexRef.set("indexHandlerClass", index.getIndexHandlerClass());
......@@ -252,13 +265,13 @@ public class HiveMetaStoreBridge {
private Referenceable fillStorageDescStruct(StorageDescriptor storageDesc) throws Exception {
LOG.debug("Filling storage descriptor information for " + storageDesc);
Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.name());
Referenceable sdReferenceable = new Referenceable(HiveDataTypes.HIVE_STORAGEDESC.getName());
SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
LOG.debug("serdeInfo = " + serdeInfo);
// SkewedInfo skewedInfo = storageDesc.getSkewedInfo();
String serdeInfoName = HiveDataTypes.HIVE_SERDE.name();
String serdeInfoName = HiveDataTypes.HIVE_SERDE.getName();
Struct serdeInfoStruct = new Struct(serdeInfoName);
serdeInfoStruct.set("name", serdeInfo.getName());
......@@ -288,7 +301,7 @@ public class HiveMetaStoreBridge {
Referenceable colReferenceable;
for (FieldSchema fs : storageDesc.getCols()) {
LOG.debug("Processing field " + fs);
colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.name());
colReferenceable = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
colReferenceable.set("name", fs.getName());
colReferenceable.set("type", fs.getType());
colReferenceable.set("comment", fs.getComment());
......@@ -299,7 +312,7 @@ public class HiveMetaStoreBridge {
List<Struct> sortColsStruct = new ArrayList<>();
for (Order sortcol : storageDesc.getSortCols()) {
String hiveOrderName = HiveDataTypes.HIVE_ORDER.name();
String hiveOrderName = HiveDataTypes.HIVE_ORDER.getName();
Struct colStruct = new Struct(hiveOrderName);
colStruct.set("col", sortcol.getCol());
colStruct.set("order", sortcol.getOrder());
......@@ -325,18 +338,8 @@ public class HiveMetaStoreBridge {
return createInstance(sdReferenceable);
}
static String getServerUrl(String[] args) {
String baseUrl = "http://localhost:21000";
if (args.length > 0) {
baseUrl = args[0];
}
return baseUrl;
}
public static void main(String[] argv) throws Exception {
String baseUrl = getServerUrl(argv);
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(baseUrl);
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(new HiveConf());
hiveMetaStoreBridge.importHiveMetadata();
}
}
......@@ -15,6 +15,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hive.hook;
......@@ -23,16 +40,15 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.HiveParser;
......@@ -40,18 +56,21 @@ import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.metadata.hivetypes.HiveTypeSystem;
import org.apache.hadoop.util.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.MetadataServiceException;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
......@@ -67,19 +86,9 @@ import java.util.concurrent.TimeUnit;
public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHook {
private static final Log LOG = LogFactory.getLog(HiveHook.class.getName());
private static final String dgcDumpDir = "/tmp/dgcfiles";
// wait time determines how long we wait before we exit the jvm on
// shutdown. Pending requests after that will not be sent.
private static final int WAIT_TIME = 3;
private static final String dbHost = "10.11.4.125";
private static final String url = "jdbc:postgres://" + dbHost + "/dgctest";
private static final String user = "postgres";
private static final String password = "postgres";
private static final String insertQuery =
"insert into query_info(query_id, query_text, query_plan, start_time, user_name, "
+ "query_graph) values (?, ?, ?, ?, ?, ?";
private static final String updateQuery =
"update query_info set end_time = ? where query_id = ?";
private static ExecutorService executor;
static {
......@@ -115,24 +124,9 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
} catch (IllegalStateException is) {
LOG.info("Attempting to send msg while shutdown in progress.");
}
LOG.info("Created DgiHook");
}
Connection connection = null;
PreparedStatement insertStatement = null;
PreparedStatement updateStatement = null;
private HiveTypeSystem hiveTypeSystem;
public HiveHook() {
try {
File dgcDumpFile = new File(dgcDumpDir);
dgcDumpFile.mkdirs();
connection = DriverManager.getConnection(url, user, password);
insertStatement = connection.prepareStatement(insertQuery);
updateStatement = connection.prepareStatement(updateQuery);
} catch (Exception e) {
LOG.error("Exception initializing HiveHook " + e);
}
LOG.info("Created DGI Hook");
executor.shutdown();
}
@Override
......@@ -142,162 +136,176 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
return;
}
final long currentTime = System.currentTimeMillis();
// clone to avoid concurrent access
final HiveConf conf = new HiveConf(hookContext.getConf());
boolean debug = conf.get("debug", "false").equals("true");
executor.submit(
if (debug) {
fireAndForget(hookContext, conf);
} else {
executor.submit(
new Runnable() {
@Override
public void run() {
try {
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
LOG.info("no plan in callback.");
return;
}
String queryId = plan.getQueryId();
long queryStartTime = plan.getQueryStartTime();
String user = hookContext.getUgi().getUserName();
String operationType = hookContext.getOperationName();
Set<WriteEntity> outputs = plan.getOutputs();
Set<ReadEntity> inputs = plan.getInputs();
switch (hookContext.getHookType()) {
case PRE_EXEC_HOOK: // command about to execute
ExplainTask explain = new ExplainTask();
explain.initialize(conf, plan, null);
String query = plan.getQueryStr();
List<Task<?>> rootTasks = plan.getRootTasks();
//We need to somehow get the sem associated with the plan and
// use it here.
//MySemanticAnaylzer sem = new MySemanticAnaylzer(conf);
//sem.setInputs(plan.getInputs());
//ExplainWork ew = new ExplainWork(null, null, rootTasks,
// plan.getFetchTask(), null, sem,
// false, true, false, false, false);
//JSONObject explainPlan =
// explain.getJSONLogicalPlan(null, ew);
String graph = "";
if (plan.getQuery().getStageGraph() != null) {
graph = plan.getQuery().getStageGraph().toString();
}
JSONObject explainPlan =
explain.getJSONPlan(null, null, rootTasks,
plan.getFetchTask(), true, false, false);
fireAndForget(conf,
createPreHookEvent(queryId, query,
explainPlan, queryStartTime,
user, inputs, outputs, graph));
break;
case POST_EXEC_HOOK: // command succeeded successfully
fireAndForget(conf,
createPostHookEvent(queryId, currentTime, user,
true, inputs, outputs));
break;
case ON_FAILURE_HOOK: // command failed
fireAndForget(conf,
createPostHookEvent(queryId, currentTime, user,
false, inputs, outputs));
break;
default:
//ignore
LOG.info("unknown hook type");
break;
}
} catch (Exception e) {
LOG.info("Failed to submit plan: " + StringUtils.stringifyException(e));
fireAndForget(hookContext, conf);
} catch (Throwable e) {
LOG.info("DGI hook failed", e);
}
}
}
);
);
}
}
private void appendEntities(JSONObject obj, String key,
Set<? extends Entity> entities)
throws JSONException {
private void fireAndForget(HookContext hookContext, HiveConf conf) throws Exception {
LOG.info("Entered DGI hook for query hook " + hookContext.getHookType());
if (hookContext.getHookType() != HookContext.HookType.POST_EXEC_HOOK) {
LOG.debug("No-op for query hook " + hookContext.getHookType());
}
for (Entity e : entities) {
if (e != null) {
JSONObject entityObj = new JSONObject();
entityObj.put("type", e.getType().toString());
entityObj.put("name", e.toString());
HiveMetaStoreBridge dgiBridge = new HiveMetaStoreBridge(conf);
HiveOperation operation = HiveOperation.valueOf(hookContext.getOperationName());
Set<ReadEntity> inputs = hookContext.getInputs();
Set<WriteEntity> outputs = hookContext.getOutputs();
String user = hookContext.getUserName();
String queryId = null;
String queryStr = null;
long queryStartTime = 0;
QueryPlan plan = hookContext.getQueryPlan();
if (plan != null) {
queryId = plan.getQueryId();
queryStr = plan.getQueryString();
queryStartTime = plan.getQueryStartTime();
}
obj.append(key, entityObj);
System.out.println(String .format("%s - %s", queryStr, hookContext.getOperationName()));
StringBuffer stringBuffer = new StringBuffer("Inputs - ");
for (ReadEntity entity : inputs) {
stringBuffer = stringBuffer.append(" ").append(entity.getType());
if (entity.getType() == Entity.Type.TABLE) {
stringBuffer = stringBuffer.append(" ").append(entity.getTable().getTableName());
} else if (entity.getType() == Entity.Type.DATABASE) {
stringBuffer = stringBuffer.append(" ").append(entity.getDatabase().getName());
}
}
stringBuffer = stringBuffer.append(" Outputs - ");
for (WriteEntity entity : outputs) {
stringBuffer = stringBuffer.append(" ").append(entity.getType());
if (entity.getType() == Entity.Type.TABLE) {
stringBuffer = stringBuffer.append(" ").append(entity.getTable().getTableName());
} else if (entity.getType() == Entity.Type.DATABASE) {
stringBuffer = stringBuffer.append(" ").append(entity.getDatabase().getName());
}
}
System.out.println(stringBuffer.toString());
switch (operation) {
case CREATEDATABASE:
String dbName = queryStr.split(" ")[2].trim();
dgiBridge.registerDatabase(dbName);
break;
case CREATETABLE:
for (WriteEntity entity : outputs) {
if (entity.getType() == Entity.Type.TABLE) {
Table table = entity.getTable();
//TODO table.getDbName().toLowerCase() is required as hive stores in lowercase, but table.getDbName() is not lowercase
Referenceable dbReferenceable = getDatabaseReference(dgiBridge, table.getDbName().toLowerCase());
dgiBridge.registerTable(dbReferenceable, table.getDbName(), table.getTableName());
}
}
break;
case CREATETABLE_AS_SELECT:
Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.name());
processReferenceable.set("processName", operation.getOperationName());
processReferenceable.set("startTime", queryStartTime);
processReferenceable.set("endTime", 0);
processReferenceable.set("userName", user);
List<Referenceable> source = new ArrayList<>();
for (ReadEntity readEntity : inputs) {
if (readEntity.getTyp() == Entity.Type.TABLE) {
source.add(getTableReference(dgiBridge, readEntity.getTable().getTableName()));
}
}
processReferenceable.set("sourceTableNames", source);
List<Referenceable> target = new ArrayList<>();
for (WriteEntity writeEntity : outputs) {
if (writeEntity.getTyp() == Entity.Type.TABLE) {
target.add(getTableReference(dgiBridge, writeEntity.getTable().getTableName()));
}
}
processReferenceable.set("targetTableNames", target);
processReferenceable.set("queryText", queryStr);
processReferenceable.set("queryId", queryId);
//TODO set
processReferenceable.set("queryPlan", "");
processReferenceable.set("queryGraph", "");
dgiBridge.createInstance(processReferenceable);
break;
default:
}
}
private JSONObject createPreHookEvent(String queryId, String query,
JSONObject explainPlan, long startTime, String user,
Set<ReadEntity> inputs, Set<WriteEntity> outputs,
String graph)
throws JSONException {
JSONObject queryObj = new JSONObject();
queryObj.put("queryText", query);
queryObj.put("queryPlan", explainPlan);
queryObj.put("queryId", queryId);
queryObj.put("startTime", startTime);
queryObj.put("user", user);
queryObj.put("graph", graph);
appendEntities(queryObj, "inputs", inputs);
appendEntities(queryObj, "output", outputs);
LOG.info("Received pre-hook notification for :" + queryId);
if (LOG.isDebugEnabled()) {
LOG.debug("DGI Info: " + queryObj.toString(2));
/**
* Gets reference for the database. Creates new instance if it doesn't exist
* @param dgiBridge
* @param dbName database name
* @return Reference for database
* @throws Exception
*/
private Referenceable getDatabaseReference(HiveMetaStoreBridge dgiBridge, String dbName) throws Exception {
String typeName = HiveDataTypes.HIVE_DB.getName();
MetadataServiceClient dgiClient = dgiBridge.getMetadataServiceClient();
JSONObject result = dgiClient.search(typeName, "name", dbName);
JSONArray results = (JSONArray) result.get("results");
if (results.length() == 0) {
//Create new instance
return dgiBridge.registerDatabase(dbName);
} else {
String guid = (String) ((JSONObject) results.get(0)).get("guid");
return new Referenceable(guid, typeName, null);
}
return queryObj;
}
private JSONObject createPostHookEvent(String queryId, long stopTime,
String user, boolean success, Set<ReadEntity> inputs,
Set<WriteEntity> outputs)
throws JSONException {
JSONObject completionObj = new JSONObject();
completionObj.put("queryId", queryId);
completionObj.put("stopTime", stopTime);
completionObj.put("user", user);
completionObj.put("result", success);
appendEntities(completionObj, "inputs", inputs);
appendEntities(completionObj, "output", outputs);
LOG.info("Received post-hook notification for :" + queryId);
if (LOG.isDebugEnabled()) {
LOG.debug("DGI Info: " + completionObj.toString(2));
/**
* Gets reference for the table. Throws up if table instance doesn't exist
* @param dgiBridge
* @param tableName table name
* @return table reference
* @throws MetadataServiceException
* @throws JSONException
*/
private Referenceable getTableReference(HiveMetaStoreBridge dgiBridge, String tableName) throws MetadataServiceException, JSONException {
String typeName = HiveDataTypes.HIVE_TABLE.getName();
MetadataServiceClient dgiClient = dgiBridge.getMetadataServiceClient();
JSONObject result = dgiClient.search(typeName, "tableName", tableName);
JSONArray results = (JSONArray) new JSONObject((String) result.get("results")).get("results");
if (results.length() == 0) {
throw new IllegalArgumentException("There is no entity for " + typeName + " where tableName=" + tableName);
}
return completionObj;
//There should be just one instance with the given name
String guid = (String) new JSONObject((String) results.get(0)).get("guid");
return new Referenceable(guid, typeName, null);
}
private synchronized void fireAndForget(Configuration conf, JSONObject obj) throws Exception {
LOG.info("Submitting: " + obj.toString(2));
String queryId = (String) obj.get("queryId");
try {
BufferedWriter fw = new BufferedWriter(
new FileWriter(new File(dgcDumpDir, queryId), true));
fw.write(obj.toString(2));
fw.flush();
fw.close();
} catch (Exception e) {
LOG.error("Unable to log logical plan to file", e);
}
}
//TODO Do we need this??
//We need to somehow get the sem associated with the plan and
// use it here.
//MySemanticAnaylzer sem = new MySemanticAnaylzer(conf);
//sem.setInputs(plan.getInputs());
//ExplainWork ew = new ExplainWork(null, null, rootTasks,
// plan.getFetchTask(), null, sem,
// false, true, false, false, false);
//JSONObject explainPlan =
// explain.getJSONLogicalPlan(null, ew);
private void analyzeHiveParseTree(ASTNode ast) {
String astStr = ast.dump();
......@@ -319,7 +327,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
try {
BufferedWriter fw = new BufferedWriter(
new FileWriter(new File(dgcDumpDir, "ASTDump"), true));
new FileWriter(new File("/tmp/dgi/", "ASTDump"), true));
fw.write("Full AST Dump" + astStr);
......
......@@ -507,10 +507,13 @@ public class HiveDataModelGenerator {
LOG.debug("Created definition for " + HiveDataTypes.HIVE_PROCESS.getName());
}
public String getModelAsJson() throws MetadataException {
createDataModel();
return getDataModelAsJSON();
}
public static void main(String[] args) throws Exception {
HiveDataModelGenerator hiveDataModelGenerator = new HiveDataModelGenerator();
hiveDataModelGenerator.createDataModel();
String hiveDataModelAsJSON = hiveDataModelGenerator.getDataModelAsJSON();
System.out.println("hiveDataModelAsJSON = " + hiveDataModelAsJSON);
System.out.println("hiveDataModelAsJSON = " + hiveDataModelGenerator.getModelAsJson());
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.metadata.typesystem.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.typesystem.ITypedStruct;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.Struct;
import org.apache.hadoop.metadata.repository.MetadataRepository;
import org.apache.hadoop.metadata.repository.IRepository;
import org.apache.hadoop.metadata.typesystem.persistence.Id;
import org.apache.hadoop.metadata.repository.RepositoryException;
import org.apache.hadoop.metadata.typesystem.types.IDataType;
import org.apache.hadoop.metadata.typesystem.types.Multiplicity;
import org.apache.hadoop.metadata.typesystem.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/**
* todo - this needs to be removed.
*/
@Deprecated
public class HiveImporter {
private static final Logger LOG =
LoggerFactory.getLogger(HiveImporter.class);
private final HiveMetaStoreClient hiveMetastoreClient;
private IRepository repository;
private MetadataRepository graphRepository;
private HiveTypeSystem hiveTypeSystem;
private List<Id> dbInstances;
private List<Id> tableInstances;
private List<Id> partitionInstances;
private List<Id> indexInstances;
private List<Id> columnInstances;
private List<Id> processInstances;
public HiveImporter(MetadataRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc)
throws RepositoryException {
this(hts, hmc);
if (repo == null) {
LOG.error("repository is null");
throw new RuntimeException("repository is null");
}
this.graphRepository = repo;
}
public HiveImporter(IRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc)
throws RepositoryException {
this(hts, hmc);
if (repo == null) {
LOG.error("repository is null");
throw new RuntimeException("repository is null");
}
repository = repo;
repository.defineTypes(hts.getHierarchicalTypeDefinitions());
}
private HiveImporter(HiveTypeSystem hts, HiveMetaStoreClient hmc) {
this.hiveMetastoreClient = hmc;
this.hiveTypeSystem = hts;
dbInstances = new ArrayList<>();
tableInstances = new ArrayList<>();
partitionInstances = new ArrayList<>();
indexInstances = new ArrayList<>();
columnInstances = new ArrayList<>();
processInstances = new ArrayList<>();
}
public List<Id> getDBInstances() {
return dbInstances;
}
public List<Id> getTableInstances() {
return tableInstances;
}
public List<Id> getPartitionInstances() {
return partitionInstances;
}
public List<Id> getColumnInstances() {
return columnInstances;
}
public List<Id> getIndexInstances() {
return indexInstances;
}
public List<Id> getProcessInstances() {
return processInstances;
}
public void importHiveMetadata() throws MetadataException {
LOG.info("Importing hive metadata");
try {
List<String> dbs = hiveMetastoreClient.getAllDatabases();
for (String db : dbs) {
importDatabase(db);
}
} catch (MetaException me) {
throw new MetadataException(me);
}
}
public void importHiveRTInfo(String stmt) throws MetadataException {
}
private boolean usingMemRepository() {
return this.graphRepository == null;
}
private InstancePair createInstance(Referenceable ref)
throws MetadataException {
if (usingMemRepository()) {
return new InstancePair(repository.create(ref), null);
} else {
String typeName = ref.getTypeName();
IDataType dataType = hiveTypeSystem.getDataType(typeName);
LOG.debug("creating instance of type " + typeName + " dataType " + dataType);
ITypedReferenceableInstance instance =
(ITypedReferenceableInstance) dataType.convert(ref, Multiplicity.OPTIONAL);
String guid = graphRepository.createEntity(instance);
System.out.println("creating instance of type " + typeName + " dataType " + dataType
+ ", guid: " + guid);
return new InstancePair(null,
new Referenceable(guid, ref.getTypeName(), ref.getValuesMap()));
}
}
private void setReferenceInstanceAttribute(Referenceable ref, String attr,
InstancePair instance) {
if (usingMemRepository()) {
ref.set(attr, instance.left());
} else {
ref.set(attr, instance.right());
}
}
private void importDatabase(String db) throws MetadataException {
try {
LOG.info("Importing objects from database : " + db);
Database hiveDB = hiveMetastoreClient.getDatabase(db);
Referenceable dbRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_DB.name());
dbRef.set("name", hiveDB.getName());
dbRef.set("description", hiveDB.getDescription());
dbRef.set("locationUri", hiveDB.getLocationUri());
dbRef.set("parameters", hiveDB.getParameters());
dbRef.set("ownerName", hiveDB.getOwnerName());
dbRef.set("ownerType", hiveDB.getOwnerType().getValue());
InstancePair dbRefTyped = createInstance(dbRef);
if (usingMemRepository()) {
dbInstances.add(dbRefTyped.left().getId());
}
importTables(db, dbRefTyped);
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importTables(String db, InstancePair dbRefTyped) throws MetadataException {
try {
List<String> hiveTables = hiveMetastoreClient.getAllTables(db);
for (String table : hiveTables) {
importTable(db, table, dbRefTyped);
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importTable(String db, String table, InstancePair dbRefTyped)
throws MetadataException {
try {
LOG.info("Importing objects from " + db + "." + table);
Table hiveTable = hiveMetastoreClient.getTable(db, table);
Referenceable tableRef = new Referenceable(
HiveTypeSystem.DefinedTypes.HIVE_TABLE.name());
setReferenceInstanceAttribute(tableRef, "dbName", dbRefTyped);
tableRef.set("tableName", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner());
tableRef.set("createTime", hiveTable.getCreateTime());
tableRef.set("lastAccessTime", hiveTable.getLastAccessTime());
tableRef.set("retention", hiveTable.getRetention());
StorageDescriptor storageDesc = hiveTable.getSd();
InstancePair sdRefTyped = fillStorageDescStruct(storageDesc);
setReferenceInstanceAttribute(tableRef, "sd", sdRefTyped);
List<InstancePair> partKeys = new ArrayList<>();
Referenceable colRef;
if (hiveTable.getPartitionKeysSize() > 0) {
for (FieldSchema fs : hiveTable.getPartitionKeys()) {
colRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
InstancePair colRefTyped = createInstance(colRef);
partKeys.add(colRefTyped);
}
if (usingMemRepository()) {
List<ITypedReferenceableInstance> keys = new ArrayList<>();
for (InstancePair ip : partKeys) {
keys.add(ip.left());
}
tableRef.set("partitionKeys", keys);
} else {
List<Referenceable> keys = new ArrayList<>();
for (InstancePair ip : partKeys) {
keys.add(ip.right());
}
tableRef.set("partitionKeys", keys);
}
}
tableRef.set("parameters", hiveTable.getParameters());
if (hiveTable.isSetViewOriginalText()) {
tableRef.set("viewOriginalText", hiveTable.getViewOriginalText());
}
if (hiveTable.isSetViewExpandedText()) {
tableRef.set("viewExpandedText", hiveTable.getViewExpandedText());
}
tableRef.set("tableType", hiveTable.getTableType());
tableRef.set("temporary", hiveTable.isTemporary());
InstancePair tableRefTyped = createInstance(tableRef);
if (usingMemRepository()) {
tableInstances.add(tableRefTyped.left().getId());
}
importPartitions(db, table, dbRefTyped, tableRefTyped, sdRefTyped);
List<Index> indexes = hiveMetastoreClient.listIndexes(db, table, Short.MAX_VALUE);
if (indexes.size() > 0) {
for (Index index : indexes) {
importIndexes(db, table, dbRefTyped, tableRef);
}
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importPartitions(String db, String table, InstancePair dbRefTyped,
InstancePair tableRefTyped, InstancePair sdRefTyped)
throws MetadataException {
try {
List<Partition> tableParts = hiveMetastoreClient
.listPartitions(db, table, Short.MAX_VALUE);
if (tableParts.size() > 0) {
for (Partition hivePart : tableParts) {
importPartition(hivePart, dbRefTyped, tableRefTyped, sdRefTyped);
}
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importPartition(Partition hivePart,
InstancePair dbRefTyped, InstancePair tableRefTyped,
InstancePair sdRefTyped)
throws MetadataException {
try {
Referenceable partRef = new Referenceable(
HiveTypeSystem.DefinedTypes.HIVE_PARTITION.name());
partRef.set("values", hivePart.getValues());
setReferenceInstanceAttribute(partRef, "dbName", dbRefTyped);
setReferenceInstanceAttribute(partRef, "tableName", tableRefTyped);
partRef.set("createTime", hivePart.getCreateTime());
partRef.set("lastAccessTime", hivePart.getLastAccessTime());
//sdStruct = fillStorageDescStruct(hivePart.getSd());
// Instead of creating copies of the sdstruct for partitions we are reusing existing
// ones
// will fix to identify partitions with differing schema.
setReferenceInstanceAttribute(partRef, "sd", sdRefTyped);
partRef.set("parameters", hivePart.getParameters());
InstancePair partRefTyped = createInstance(partRef);
if (usingMemRepository()) {
partitionInstances.add(partRefTyped.left().getId());
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importIndexes(String db, String table, InstancePair dbRefTyped,
Referenceable tableRef)
throws MetadataException {
try {
List<Index> indexes = hiveMetastoreClient.listIndexes(db, table, Short.MAX_VALUE);
if (indexes.size() > 0) {
for (Index index : indexes) {
importIndex(index, dbRefTyped, tableRef);
}
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importIndex(Index index,
InstancePair dbRefTyped, Referenceable tableRef)
throws MetadataException {
try {
Referenceable indexRef = new Referenceable(
HiveTypeSystem.DefinedTypes.HIVE_INDEX.name());
indexRef.set("indexName", index.getIndexName());
indexRef.set("indexHandlerClass", index.getIndexHandlerClass());
setReferenceInstanceAttribute(indexRef, "dbName", dbRefTyped);
indexRef.set("createTime", index.getCreateTime());
indexRef.set("lastAccessTime", index.getLastAccessTime());
indexRef.set("origTableName", index.getOrigTableName());
indexRef.set("indexTableName", index.getIndexTableName());
InstancePair sdRefTyped = fillStorageDescStruct(index.getSd());
setReferenceInstanceAttribute(indexRef, "sd", sdRefTyped);
indexRef.set("parameters", index.getParameters());
tableRef.set("deferredRebuild", index.isDeferredRebuild());
InstancePair indexRefTyped = createInstance(indexRef);
if (usingMemRepository()) {
indexInstances.add(indexRefTyped.left().getId());
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private InstancePair fillStorageDescStruct(StorageDescriptor storageDesc) throws Exception {
Referenceable sdRef = new Referenceable(
HiveTypeSystem.DefinedTypes.HIVE_STORAGEDESC.name());
SerDeInfo serdeInfo = storageDesc.getSerdeInfo();
// SkewedInfo skewedInfo = storageDesc.getSkewedInfo();
LOG.debug("Filling storage descriptor information for " + storageDesc);
String serdeInfoName = HiveTypeSystem.DefinedTypes.HIVE_SERDE.name();
Struct serdeInfoStruct = new Struct(serdeInfoName);
serdeInfoStruct.set("name", serdeInfo.getName());
serdeInfoStruct.set("serializationLib", serdeInfo.getSerializationLib());
serdeInfoStruct.set("parameters", serdeInfo.getParameters());
LOG.debug("serdeInfo = " + serdeInfo);
StructType serdeInfotype = (StructType) hiveTypeSystem.getDataType(serdeInfoName);
ITypedStruct serdeInfoStructTyped =
serdeInfotype.convert(serdeInfoStruct, Multiplicity.OPTIONAL);
sdRef.set("serdeInfo", serdeInfoStructTyped);
// Will need to revisit this after we fix typesystem.
//LOG.info("skewedInfo = " + skewedInfo);
//String skewedInfoName = HiveTypeSystem.DefinedTypes.HIVE_SKEWEDINFO.name();
//Struct skewedInfoStruct = new Struct(skewedInfoName);
//if (skewedInfo.getSkewedColNames().size() > 0) {
// skewedInfoStruct.set("skewedColNames", skewedInfo.getSkewedColNames());
// skewedInfoStruct.set("skewedColValues", skewedInfo.getSkewedColValues());
// skewedInfoStruct.set("skewedColValueLocationMaps", skewedInfo
// .getSkewedColValueLocationMaps());
// StructType skewedInfotype = (StructType) hiveTypeSystem.getDataType(skewedInfoName);
// ITypedStruct skewedInfoStructTyped =
// skewedInfotype.convert(skewedInfoStruct, Multiplicity.OPTIONAL);
// sdStruct.set("skewedInfo", skewedInfoStructTyped);
//}
List<InstancePair> fieldsList = new ArrayList<>();
Referenceable colRef;
for (FieldSchema fs : storageDesc.getCols()) {
LOG.debug("Processing field " + fs);
colRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
InstancePair colRefTyped = createInstance(colRef);
fieldsList.add(colRefTyped);
if (usingMemRepository()) {
columnInstances.add(colRefTyped.left().getId());
}
}
if (usingMemRepository()) {
List<ITypedReferenceableInstance> flds = new ArrayList<>();
for (InstancePair ip : fieldsList) {
flds.add(ip.left());
}
sdRef.set("cols", flds);
} else {
List<Referenceable> flds = new ArrayList<>();
for (InstancePair ip : fieldsList) {
flds.add(ip.right());
}
sdRef.set("cols", flds);
}
List<ITypedStruct> sortColsStruct = new ArrayList<>();
for (Order sortcol : storageDesc.getSortCols()) {
String hiveOrderName = HiveTypeSystem.DefinedTypes.HIVE_ORDER.name();
Struct colStruct = new Struct(hiveOrderName);
colStruct.set("col", sortcol.getCol());
colStruct.set("order", sortcol.getOrder());
StructType sortColType = (StructType) hiveTypeSystem.getDataType(hiveOrderName);
ITypedStruct sortColTyped =
sortColType.convert(colStruct, Multiplicity.OPTIONAL);
sortColsStruct.add(sortColTyped);
}
sdRef.set("location", storageDesc.getLocation());
sdRef.set("inputFormat", storageDesc.getInputFormat());
sdRef.set("outputFormat", storageDesc.getOutputFormat());
sdRef.set("compressed", storageDesc.isCompressed());
if (storageDesc.getBucketCols().size() > 0) {
sdRef.set("bucketCols", storageDesc.getBucketCols());
}
if (sortColsStruct.size() > 0) {
sdRef.set("sortCols", sortColsStruct);
}
sdRef.set("parameters", storageDesc.getParameters());
sdRef.set("storedAsSubDirectories", storageDesc.isStoredAsSubDirectories());
InstancePair sdRefTyped = createInstance(sdRef);
return sdRefTyped;
}
private class Pair<L, R> {
final L left;
final R right;
public Pair(L left, R right) {
this.left = left;
this.right = right;
}
public L left() {
return this.left;
}
public R right() {
return this.right;
}
}
private class InstancePair extends Pair<ITypedReferenceableInstance, Referenceable> {
public InstancePair(ITypedReferenceableInstance left, Referenceable right) {
super(left, right);
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.typesystem.types.AttributeDefinition;
import org.apache.hadoop.metadata.typesystem.types.ClassType;
import org.apache.hadoop.metadata.typesystem.types.DataTypes;
import org.apache.hadoop.metadata.typesystem.types.EnumTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.EnumValue;
import org.apache.hadoop.metadata.typesystem.types.HierarchicalType;
import org.apache.hadoop.metadata.typesystem.types.HierarchicalTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.IDataType;
import org.apache.hadoop.metadata.typesystem.types.Multiplicity;
import org.apache.hadoop.metadata.typesystem.types.StructTypeDefinition;
import org.apache.hadoop.metadata.typesystem.types.TraitType;
import org.apache.hadoop.metadata.typesystem.types.TypeSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* todo: remove this.
*/
@Deprecated
public class HiveTypeSystem {
private static final Logger LOG =
LoggerFactory.getLogger(HiveTypeSystem.class);
private boolean valid = false;
private Map<String, HierarchicalTypeDefinition<ClassType>> classTypeDefinitions;
private Map<String, EnumTypeDefinition> enumTypeDefinitionMap;
private Map<String, StructTypeDefinition> structTypeDefinitionMap;
private DataTypes.MapType mapStrToStrMap;
private DataTypes.ArrayType strArrayType;
private Map<String, IDataType> typeMap;
private List<IDataType> enumTypes;
private HiveTypeSystem() {
classTypeDefinitions = new HashMap<>();
enumTypeDefinitionMap = new HashMap<>();
structTypeDefinitionMap = new HashMap<>();
typeMap = new HashMap<>();
enumTypes = new ArrayList<>();
}
public synchronized static HiveTypeSystem getInstance() throws MetadataException {
HiveTypeSystem hs = Holder.instance;
if (hs.valid) {
LOG.info("Returning pre-initialized HiveTypeSystem singleton");
return hs;
}
hs.initialize();
return hs;
}
// private static Multiplicity ZeroOrMore = new Multiplicity(0, Integer.MAX_VALUE, true);
private void initialize() throws MetadataException {
LOG.info("Initializing the Hive Typesystem");
TypeSystem typeSystem = TypeSystem.getInstance();
mapStrToStrMap =
typeSystem.defineMapType(DataTypes.STRING_TYPE, DataTypes.STRING_TYPE);
strArrayType = typeSystem.defineArrayType(DataTypes.STRING_TYPE);
createHiveObjectTypeEnum();
createHivePrincipalTypeEnum();
createFunctionTypeEnum();
createResourceTypeEnum();
createSerDeStruct();
//createSkewedInfoStruct();
createOrderStruct();
createResourceUriStruct();
createStorageDescClass();
createDBClass();
createTypeClass();
createColumnClass();
createPartitionClass();
createTableClass();
createIndexClass();
createFunctionClass();
createRoleClass();
createProcessClass();
for (EnumTypeDefinition def : getEnumTypeDefinitions()) {
enumTypes.add(typeSystem.defineEnumType(def));
}
typeMap.putAll(
typeSystem.defineTypes(getStructTypeDefinitions(), getTraitTypeDefinitions(),
getClassTypeDefinitions()));
valid = true;
}
Map<String, IDataType> getTypeMap() {
return typeMap;
}
public IDataType getDataType(String typeName) {
return typeMap.get(typeName);
}
public ImmutableList<HierarchicalType> getHierarchicalTypeDefinitions() {
if (valid) {
return ImmutableList.of(
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_DB.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_STORAGEDESC.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_TABLE.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_COLUMN.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_PARTITION.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_INDEX.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_FUNCTION.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_ROLE.name()),
(HierarchicalType) typeMap.get(DefinedTypes.HIVE_PROCESS.name())
);
} else {
return ImmutableList.of();
}
}
public ImmutableList<EnumTypeDefinition> getEnumTypeDefinitions() {
return ImmutableList.copyOf(enumTypeDefinitionMap.values());
}
public ImmutableList<StructTypeDefinition> getStructTypeDefinitions() {
return ImmutableList.copyOf(structTypeDefinitionMap.values());
}
public ImmutableList<HierarchicalTypeDefinition<ClassType>> getClassTypeDefinitions() {
return ImmutableList.copyOf(classTypeDefinitions.values());
}
public ImmutableList<HierarchicalTypeDefinition<TraitType>> getTraitTypeDefinitions() {
return ImmutableList.of();
}
private void createHiveObjectTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("GLOBAL", 1),
new EnumValue("DATABASE", 2),
new EnumValue("TABLE", 3),
new EnumValue("PARTITION", 4),
new EnumValue("COLUMN", 5),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_OBJECTTYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_OBJECTTYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_OBJECTTYPE.name());
}
private void createHivePrincipalTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("USER", 1),
new EnumValue("ROLE", 2),
new EnumValue("GROUP", 3),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_PRINCIPALTYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_PRINCIPALTYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_PRINCIPALTYPE.name());
}
private void createFunctionTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("JAVA", 1),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_FUNCTIONTYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_FUNCTIONTYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_FUNCTIONTYPE.name());
}
private void createResourceTypeEnum() throws MetadataException {
EnumValue values[] = {
new EnumValue("JAR", 1),
new EnumValue("FILE", 2),
new EnumValue("ARCHIVE", 3),
};
EnumTypeDefinition definition = new EnumTypeDefinition(
DefinedTypes.HIVE_RESOURCETYPE.name(), values);
enumTypeDefinitionMap.put(DefinedTypes.HIVE_RESOURCETYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_RESOURCETYPE.name());
}
private void createSerDeStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("serializationLib", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(),
Multiplicity.OPTIONAL, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_SERDE.name(),
attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_SERDE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_SERDE.name());
}
/**
private void createSkewedInfoStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("skewedColNames", String.format("array<%s>", DataTypes.STRING_TYPE
.getName()),
ZeroOrMore, false, null),
new AttributeDefinition("skewedColValues", String.format("array<%s>", strArrayType.getName()),
ZeroOrMore, false, null),
new AttributeDefinition("skewedColValueLocationMaps", mapStrToStrMap.getName(), Multiplicity
.OPTIONAL, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_SKEWEDINFO.name
(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_SKEWEDINFO.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_SKEWEDINFO.name());
}
**/
private void createOrderStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("col", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("order", DataTypes.INT_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(DefinedTypes.HIVE_ORDER.name(),
attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_ORDER.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_ORDER.name());
}
private void createStorageDescClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("cols",
String.format("array<%s>", DefinedTypes.HIVE_COLUMN.name()),
Multiplicity.COLLECTION, false, null),
new AttributeDefinition("location", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("inputFormat", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("outputFormat", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("compressed", DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("numBuckets", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("serdeInfo", DefinedTypes.HIVE_SERDE.name(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("bucketCols",
String.format("array<%s>", DataTypes.STRING_TYPE.getName()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sortCols",
String.format("array<%s>", DefinedTypes.HIVE_ORDER.name()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(),
Multiplicity.OPTIONAL, false, null),
//new AttributeDefinition("skewedInfo", DefinedTypes.HIVE_SKEWEDINFO.name(),
// Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("storedAsSubDirectories", DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class,
DefinedTypes.HIVE_STORAGEDESC.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_STORAGEDESC.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_STORAGEDESC.name());
}
/** Revisit later after nested array types are handled by the typesystem **/
private void createResourceUriStruct() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("resourceType", DefinedTypes.HIVE_RESOURCETYPE.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("uri", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
};
StructTypeDefinition definition = new StructTypeDefinition(
DefinedTypes.HIVE_RESOURCEURI.name(), attributeDefinitions);
structTypeDefinitionMap.put(DefinedTypes.HIVE_RESOURCEURI.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_RESOURCEURI.name());
}
private void createDBClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("description", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("locationUri", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerName", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerType", DefinedTypes.HIVE_PRINCIPALTYPE.name(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_DB.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_DB.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_DB.name());
}
private void createTypeClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("type1", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("type2", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("fields", String.format("array<%s>",
DefinedTypes.HIVE_COLUMN.name()), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_TYPE.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_TYPE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_TYPE.name());
}
private void createColumnClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("name", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
//new AttributeDefinition("type", DefinedTypes.HIVE_TYPE.name(), Multiplicity
// .REQUIRED, false, null),
new AttributeDefinition("type", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("comment", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_COLUMN.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_COLUMN.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_COLUMN.name());
}
private void createPartitionClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("values", DataTypes.STRING_TYPE.getName(),
Multiplicity.COLLECTION, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("tableName", DefinedTypes.HIVE_TABLE.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", DefinedTypes.HIVE_STORAGEDESC.name(),
Multiplicity.REQUIRED, false, null),
//new AttributeDefinition("columns", String.format("array<%s>", DefinedTypes
// .HIVE_COLUMN.name()),
// Multiplicity.COLLECTION, true, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class,
DefinedTypes.HIVE_PARTITION.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_PARTITION.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_PARTITION.name());
}
private void createTableClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("tableName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("owner", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("retention", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", DefinedTypes.HIVE_STORAGEDESC.name(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("partitionKeys",
String.format("array<%s>", DefinedTypes.HIVE_COLUMN.name()),
Multiplicity.OPTIONAL, false, null),
//new AttributeDefinition("columns", String.format("array<%s>", DefinedTypes
// .HIVE_COLUMN.name()),
// Multiplicity.COLLECTION, true, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("viewOriginalText", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("viewExpandedText", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("tableType", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("temporary", DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_TABLE.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_TABLE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_TABLE.name());
}
private void createIndexClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("indexName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("indexHandlerClass", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("lastAccessTime", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("origTableName", DefinedTypes.HIVE_TABLE.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("indexTableName", DefinedTypes.HIVE_TABLE.name(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", DefinedTypes.HIVE_STORAGEDESC.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("parameters", mapStrToStrMap.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("deferredRebuild", DataTypes.BOOLEAN_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_INDEX.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_INDEX.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_INDEX.name());
}
private void createFunctionClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("functionName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("dbName", DefinedTypes.HIVE_DB.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("className", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerName", DataTypes.INT_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("ownerType", DefinedTypes.HIVE_PRINCIPALTYPE.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("functionType", DefinedTypes.HIVE_FUNCTIONTYPE.name(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("resourceUris", DefinedTypes.HIVE_RESOURCEURI.name(),
Multiplicity.COLLECTION, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_FUNCTION.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_FUNCTION.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_FUNCTION.name());
}
private void createRoleClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("roleName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("createTime", DataTypes.INT_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("ownerName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_ROLE.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_ROLE.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_ROLE.name());
}
private void createProcessClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("processName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("startTime", DataTypes.INT_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("endTime", DataTypes.INT_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("userName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("sourceTableNames",
String.format("array<%s>", DefinedTypes.HIVE_TABLE.name()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("targetTableNames",
String.format("array<%s>", DefinedTypes.HIVE_TABLE.name()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("queryText", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryPlan", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryId", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryGraph", DataTypes.STRING_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_PROCESS.name(),
null, attributeDefinitions);
classTypeDefinitions.put(DefinedTypes.HIVE_PROCESS.name(), definition);
LOG.debug("Created definition for " + DefinedTypes.HIVE_PROCESS.name());
}
public enum DefinedTypes {
// Enums
HIVE_OBJECTTYPE,
HIVE_PRINCIPALTYPE,
HIVE_RESOURCETYPE,
HIVE_FUNCTIONTYPE,
// Structs
HIVE_SERDE,
HIVE_SKEWEDINFO,
HIVE_ORDER,
HIVE_RESOURCEURI,
// Classes
HIVE_DB,
HIVE_STORAGEDESC,
HIVE_TABLE,
HIVE_COLUMN,
HIVE_PARTITION,
HIVE_INDEX,
HIVE_FUNCTION,
HIVE_ROLE,
HIVE_TYPE,
HIVE_PROCESS,
//HIVE_VIEW,
}
public static final class Holder {
public static final HiveTypeSystem instance = new HiveTypeSystem();
}
}
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://c6501.ambari.apache.org:8020</value>
<final>true</final>
</property>
<property>
<name>fs.trash.interval</name>
<value>360</value>
</property>
<property>
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
<value>true</value>
</property>
<property>
<name>hadoop.proxyuser.hcat.groups</name>
<value>users</value>
</property>
<property>
<name>hadoop.proxyuser.hcat.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>users</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.security.auth_to_local</name>
<value>
RULE:[2:$1@$0]([rn]m@.*)s/.*/yarn/
RULE:[2:$1@$0](jhs@.*)s/.*/mapred/
RULE:[2:$1@$0]([nd]n@.*)s/.*/hdfs/
RULE:[2:$1@$0](hm@.*)s/.*/hbase/
RULE:[2:$1@$0](rs@.*)s/.*/hbase/
DEFAULT
</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec
</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>io.serializations</name>
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>50</value>
</property>
<property>
<name>ipc.client.connection.maxidletime</name>
<value>30000</value>
</property>
<property>
<name>ipc.client.idlethreshold</name>
<value>8000</value>
</property>
<property>
<name>ipc.server.tcpnodelay</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobtracker.webinterface.trusted</name>
<value>false</value>
</property>
<property>
<name>proxyuser_group</name>
<value>users</value>
</property>
</configuration>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--Wed Feb 4 03:23:35 2015-->
<configuration>
<property>
<name>dfs.block.access.token.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.blockreport.initialDelay</name>
<value>120</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.size</name>
<value>4096</value>
</property>
<property>
<name>dfs.cluster.administrators</name>
<value>hdfs</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
</property>
<property>
<name>dfs.datanode.balance.bandwidthPerSec</name>
<value>6250000</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>750</value>
</property>
<property>
<name>dfs.datanode.du.reserved</name>
<value>1073741824</value>
</property>
<property>
<name>dfs.datanode.failed.volumes.tolerated</name>
<value>0</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
</property>
<property>
<name>dfs.datanode.https.address</name>
<value>0.0.0.0:50475</value>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:8010</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>4096</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/lib/hadoop-hdfs/dn_socket</value>
</property>
<property>
<name>dfs.heartbeat.interval</name>
<value>3</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/etc/hadoop/conf/dfs.exclude</value>
</property>
<property>
<name>dfs.http.policy</name>
<value>HTTP_ONLY</value>
</property>
<property>
<name>dfs.https.port</name>
<value>50470</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/hadoop/hdfs/journalnode</value>
</property>
<property>
<name>dfs.journalnode.http-address</name>
<value>0.0.0.0:8480</value>
</property>
<property>
<name>dfs.namenode.accesstime.precision</name>
<value>0</value>
</property>
<property>
<name>dfs.namenode.avoid.read.stale.datanode</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.avoid.write.stale.datanode</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/hadoop/hdfs/namesecondary</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>${dfs.namenode.checkpoint.dir}</value>
</property>
<property>
<name>dfs.namenode.checkpoint.period</name>
<value>21600</value>
</property>
<property>
<name>dfs.namenode.checkpoint.txns</name>
<value>1000000</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>40</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>c6501.ambari.apache.org:50070</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.https-address</name>
<value>c6501.ambari.apache.org:50470</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hadoop/hdfs/namenode</value>
</property>
<property>
<name>dfs.namenode.name.dir.restore</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.safemode.threshold-pct</name>
<value>1.0f</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>c6501.ambari.apache.org:50090</value>
</property>
<property>
<name>dfs.namenode.stale.datanode.interval</name>
<value>30000</value>
</property>
<property>
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
<value>3600</value>
</property>
<property>
<name>dfs.namenode.write.stale.datanode.ratio</name>
<value>1.0f</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hdfs</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.replication.max</name>
<value>50</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
<final>true</final>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
</configuration>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--Wed Feb 4 03:19:28 2015-->
<configuration>
<property>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
</property>
<property>
<name>datanucleus.cache.level2.type</name>
<value>none</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>238026752</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
<value>false</value>
</property>
<property>
<name>hive.cbo.enable</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.header</name>
<value>false</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
<value>c6501.ambari.apache.org:2181</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.znode</name>
<value>/hive/cluster/delegation</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
</property>
<property>
<name>hive.compactor.worker.threads</name>
<value>0</value>
</property>
<property>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<value>true</value>
</property>
<property>
<name>hive.conf.restricted.list</name>
<value>
hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role
</value>
</property>
<property>
<name>hive.convert.join.bucket.mapjoin.tez</name>
<value>false</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.sorting</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.exec.compress.intermediate</name>
<value>false</value>
</property>
<property>
<name>hive.exec.compress.output</name>
<value>false</value>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.exec.failure.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.max.created.files</name>
<value>100000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions</name>
<value>5000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name>
<value>2000</value>
</property>
<property>
<name>hive.exec.orc.compression.strategy</name>
<value>SPEED</value>
</property>
<property>
<name>hive.exec.orc.default.compress</name>
<value>ZLIB</value>
</property>
<property>
<name>hive.exec.orc.default.stripe.size</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>false</value>
</property>
<property>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.reducers.max</name>
<value>1009</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive</value>
</property>
<property>
<name>hive.exec.submit.local.task.via.child</name>
<value>true</value>
</property>
<property>
<name>hive.exec.submitviachild</name>
<value>false</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.fetch.task.aggr</name>
<value>false</value>
</property>
<property>
<name>hive.fetch.task.conversion</name>
<value>more</value>
</property>
<property>
<name>hive.fetch.task.conversion.threshold</name>
<value>1073741824</value>
</property>
<property>
<name>hive.heapsize</name>
<value>1024</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
</property>
<property>
<name>hive.map.aggr.hash.force.flush.memory.threshold</name>
<value>0.9</value>
</property>
<property>
<name>hive.map.aggr.hash.min.reduction</name>
<value>0.5</value>
</property>
<property>
<name>hive.map.aggr.hash.percentmemory</name>
<value>0.5</value>
</property>
<property>
<name>hive.mapjoin.bucket.cache.size</name>
<value>10000</value>
</property>
<property>
<name>hive.mapjoin.optimized.hashtable</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>hive.merge.mapfiles</name>
<value>true</value>
</property>
<property>
<name>hive.merge.mapredfiles</name>
<value>false</value>
</property>
<property>
<name>hive.merge.orcfile.stripe.level</name>
<value>true</value>
</property>
<property>
<name>hive.merge.rcfile.block.level</name>
<value>true</value>
</property>
<property>
<name>hive.merge.size.per.task</name>
<value>256000000</value>
</property>
<property>
<name>hive.merge.smallfiles.avgsize</name>
<value>16000000</value>
</property>
<property>
<name>hive.merge.tezfiles</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.authorization.storage.checks</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>5s</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>1800s</value>
</property>
<property>
<name>hive.metastore.connect.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.failure.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.kerberos.keytab.file</name>
<value>/etc/security/keytabs/hive.service.keytab</value>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive/_HOST@EXAMPLE.COM</value>
</property>
<property>
<name>hive.metastore.pre.event.listeners</name>
<value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener
</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>100000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://c6501.ambari.apache.org:9083</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/apps/hive/warehouse</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<value>false</value>
</property>
<property>
<name>hive.optimize.constant.propagation</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.index.filter</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.mapjoin.mapreduce</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.metadataonly</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.null.scan</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
</property>
<property>
<name>hive.optimize.sort.dynamic.partition</name>
<value>false</value>
</property>
<property>
<name>hive.orc.compute.splits.num.threads</name>
<value>10</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.numcontainers</name>
<value>10</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>
org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory
</value>
</property>
<property>
<name>hive.security.metastore.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
</property>
<property>
<name>hive.security.metastore.authorization.auth.reads</name>
<value>true</value>
</property>
<property>
<name>hive.security.metastore.authorization.manager</name>
<value>
org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider,org.apache.hadoop.hive.ql.security.authorization.MetaStoreAuthzAPIAuthorizerEmbedOnly
</value>
</property>
<property>
<name>hive.server2.allow.user.substitution</name>
<value>true</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NONE</value>
</property>
<property>
<name>hive.server2.authentication.spnego.keytab</name>
<value>HTTP/_HOST@EXAMPLE.COM</value>
</property>
<property>
<name>hive.server2.authentication.spnego.principal</name>
<value>/etc/security/keytabs/spnego.service.keytab</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>hive.server2.enable.impersonation</name>
<value>true</value>
</property>
<property>
<name>hive.server2.logging.operation.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>${system:java.io.tmpdir}/${system:user.name}/operation_logs</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>
<property>
<name>hive.server2.table.type.mapping</name>
<value>CLASSIC</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<value>false</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
</property>
<property>
<name>hive.server2.thrift.http.path</name>
<value>cliservice</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>10001</value>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>500</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.sasl.qop</name>
<value>auth</value>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>binary</value>
</property>
<property>
<name>hive.server2.use.SSL</name>
<value>false</value>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2</value>
</property>
<property>
<name>hive.smbjoin.cache.rows</name>
<value>10000</value>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
</property>
<property>
<name>hive.stats.dbclass</name>
<value>fs</value>
</property>
<property>
<name>hive.stats.fetch.column.stats</name>
<value>false</value>
</property>
<property>
<name>hive.stats.fetch.partition.stats</name>
<value>true</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>false</value>
</property>
<property>
<name>hive.tez.auto.reducer.parallelism</name>
<value>false</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>682</value>
</property>
<property>
<name>hive.tez.cpu.vcores</name>
<value>-1</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning</name>
<value>true</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.data.size</name>
<value>104857600</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.event.size</name>
<value>1048576</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx546m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA
-XX:+UseParallelGC -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps
</value>
</property>
<property>
<name>hive.tez.log.level</name>
<value>INFO</value>
</property>
<property>
<name>hive.tez.max.partition.factor</name>
<value>2.0</value>
</property>
<property>
<name>hive.tez.min.partition.factor</name>
<value>0.25</value>
</property>
<property>
<name>hive.tez.smb.number.waves</name>
<value>0.5</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
</property>
<property>
<name>hive.txn.max.open.batch</name>
<value>1000</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
</property>
<property>
<name>hive.user.install.directory</name>
<value>/user/</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.execution.reduce.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>4096</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>0.1</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>100000</value>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>
<property>
<name>hive.zookeeper.namespace</name>
<value>hive_zookeeper_namespace</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>c6501.ambari.apache.org:2181</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://c6501.ambari.apache.org/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
</configuration>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>mapreduce.admin.map.child.java.opts</name>
<value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}
</value>
</property>
<property>
<name>mapreduce.admin.reduce.child.java.opts</name>
<value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}
</value>
</property>
<property>
<name>mapreduce.admin.user.env</name>
<value>
LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64
</value>
</property>
<property>
<name>mapreduce.am.max-attempts</name>
<value>2</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure
</value>
</property>
<property>
<name>mapreduce.application.framework.path</name>
<value>/hdp/apps/${hdp.version}/mapreduce/mapreduce.tar.gz#mr-framework</value>
</property>
<property>
<name>mapreduce.cluster.administrators</name>
<value>hadoop</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.emit-timeline-data</name>
<value>false</value>
</property>
<property>
<name>mapreduce.job.reduce.slowstart.completedmaps</name>
<value>0.05</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>c6501.ambari.apache.org:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/mr-history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/mr-history/tmp</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>c6501.ambari.apache.org:19888</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx546m</value>
</property>
<property>
<name>mapreduce.map.log.level</name>
<value>INFO</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>682</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>false</value>
</property>
<property>
<name>mapreduce.map.sort.spill.percent</name>
<value>0.7</value>
</property>
<property>
<name>mapreduce.map.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress</name>
<value>false</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress.type</name>
<value>BLOCK</value>
</property>
<property>
<name>mapreduce.reduce.input.buffer.percent</name>
<value>0.0</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx546m</value>
</property>
<property>
<name>mapreduce.reduce.log.level</name>
<value>INFO</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>682</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.enabled</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.interval-ms</name>
<value>1000</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.timeout-ms</name>
<value>30000</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.input.buffer.percent</name>
<value>0.7</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.merge.percent</name>
<value>0.66</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>30</value>
</property>
<property>
<name>mapreduce.reduce.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>13562</value>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>273</value>
</property>
<property>
<name>mapreduce.task.timeout</name>
<value>300000</value>
</property>
<property>
<name>yarn.app.mapreduce.am.admin-command-opts</name>
<value>-Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx546m -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>yarn.app.mapreduce.am.log.level</name>
<value>INFO</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>682</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
</configuration>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>hadoop.registry.rm.enabled</name>
<value>true</value>
</property>
<property>
<name>hadoop.registry.zk.quorum</name>
<value>c6501.ambari.apache.org:2181</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>false</value>
</property>
<property>
<name>yarn.admin.acl</name>
<value></value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,/usr/hdp/current/hadoop-client/*,/usr/hdp/current/hadoop-client/lib/*,/usr/hdp/current/hadoop-hdfs-client/*,/usr/hdp/current/hadoop-hdfs-client/lib/*,/usr/hdp/current/hadoop-yarn-client/*,/usr/hdp/current/hadoop-yarn-client/lib/*
</value>
</property>
<property>
<name>yarn.client.nodemanager-connect.max-wait-ms</name>
<value>900000</value>
</property>
<property>
<name>yarn.client.nodemanager-connect.retry-interval-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>2592000</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://c6501.ambari.apache.org:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.node-labels.fs-store.retry-policy-spec</name>
<value>2000, 500</value>
</property>
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>/system/yarn/node-labels</value>
</property>
<property>
<name>yarn.node-labels.manager-class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.nodelabels.MemoryRMNodeLabelsManager
</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:45454</value>
</property>
<property>
<name>yarn.nodemanager.admin-env</name>
<value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.container-executor.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value>
</property>
<property>
<name>yarn.nodemanager.container-monitor.interval-ms</name>
<value>3000</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
<value>90</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
<value>1000</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
<value>0.25</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.interval-ms</name>
<value>135000</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.script.timeout-ms</name>
<value>60000</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
<value>hadoop-yarn</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.group</name>
<value>hadoop</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/hadoop/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.compression-type</name>
<value>gz</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.debug-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.num-log-files-per-app</name>
<value>30</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>-1</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/hadoop/yarn/log</value>
</property>
<property>
<name>yarn.nodemanager.log.retain-second</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.recovery.dir</name>
<value>/var/log/hadoop-yarn/nodemanager/recovery-state</value>
</property>
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/app-logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name>
<value>100</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>c6501.ambari.apache.org:8050</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>c6501.ambari.apache.org:8141</value>
</property>
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>2</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.resourcemanager.connect.max-wait.ms</name>
<value>900000</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>30000</value>
</property>
<property>
<name>yarn.resourcemanager.fs.state-store.retry-policy-spec</name>
<value>2000, 500</value>
</property>
<property>
<name>yarn.resourcemanager.fs.state-store.uri</name>
<value></value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/etc/hadoop/conf/yarn.exclude</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>c6501.ambari.apache.org:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>c6501.ambari.apache.org:8030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
</value>
</property>
<property>
<name>yarn.resourcemanager.state-store.max-completed-applications</name>
<value>${yarn.resourcemanager.max-completed-applications}</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size</name>
<value>10</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>c6501.ambari.apache.org:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-acl</name>
<value>world:anyone:rwcda</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>localhost:2181</value>
</property>
<property>
<name>yarn.resourcemanager.zk-num-retries</name>
<value>1000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-retry-interval-ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-state-store.parent-path</name>
<value>/rmstore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-timeout-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>682</value>
</property>
<property>
<name>yarn.timeline-service.address</name>
<value>c6501.ambari.apache.org:10200</value>
</property>
<property>
<name>yarn.timeline-service.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.timeline-service.client.max-retries</name>
<value>30</value>
</property>
<property>
<name>yarn.timeline-service.client.retry-interval-ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.store-class</name>
<value>org.apache.hadoop.yarn.server.applicationhistoryservice.NullApplicationHistoryStore
</value>
</property>
<property>
<name>yarn.timeline-service.http-authentication.simple.anonymous.allowed</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.http-authentication.type</name>
<value>simple</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.path</name>
<value>/hadoop/yarn/timeline</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.read-cache-size</name>
<value>104857600</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size</name>
<value>10000</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size</name>
<value>10000</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms</name>
<value>300000</value>
</property>
<property>
<name>yarn.timeline-service.store-class</name>
<value>org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore</value>
</property>
<property>
<name>yarn.timeline-service.ttl-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.ttl-ms</name>
<value>2678400000</value>
</property>
<property>
<name>yarn.timeline-service.webapp.address</name>
<value>c6501.ambari.apache.org:8188</value>
</property>
<property>
<name>yarn.timeline-service.webapp.https.address</name>
<value>c6501.ambari.apache.org:8190</value>
</property>
</configuration>
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hive.hook;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.TypesDef;
import org.apache.hadoop.metadata.typesystem.json.TypesSerialization;
import org.apache.hadoop.metadata.typesystem.types.TypeSystem;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class HiveHookIT {
private static final String DGI_URL = "http://localhost:21000/";
private Driver driver;
private MetadataServiceClient dgiCLient;
private SessionState ss;
@BeforeClass
public void setUp() throws Exception {
//Register hive types
HiveDataModelGenerator hiveModel = new HiveDataModelGenerator();
hiveModel.createDataModel();
TypesDef typesDef = hiveModel.getTypesDef();
String typesAsJson = TypesSerialization.toJson(typesDef);
MetadataServiceClient dgiClient = new MetadataServiceClient(DGI_URL);
try {
dgiClient.createType(typesAsJson);
} catch (Exception e) {
//ignore if types are already defined
}
//Set-up hive session
HiveConf conf = getHiveConf();
driver = new Driver(conf);
ss = new SessionState(conf);
ss = ss.start(conf);
SessionState.setCurrentSessionState(ss);
dgiCLient = new MetadataServiceClient(DGI_URL);
}
private HiveConf getHiveConf() {
HiveConf hiveConf = new HiveConf(this.getClass());
hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, "");
hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, HiveHook.class.getName());
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, System.getProperty("user.dir") + "/target/metastore");
hiveConf.set(HiveMetaStoreBridge.DGI_URL_PROPERTY, DGI_URL);
hiveConf.set("debug", "true");
return hiveConf;
}
private void runCommand(String cmd) throws Exception {
ss.setCommandType(null);
driver.run(cmd);
}
@Test
public void testCreateDatabase() throws Exception {
String dbName = "db" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
runCommand("create database " + dbName);
String typeName = HiveDataTypes.HIVE_DB.getName();
JSONObject result = dgiCLient.search(typeName, "name", dbName);
JSONArray results = (JSONArray) result.get("results");
Assert.assertEquals(results.length(), 1);
JSONObject resultRow = (JSONObject) results.get(0);
Assert.assertEquals(resultRow.get(typeName + ".name"), dbName);
}
@Test(enabled = false)
public void testCreateTable() throws Exception {
String dbName = "db" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
runCommand("create database " + dbName);
String tableName = "table" + RandomStringUtils.randomAlphanumeric(5);
String queryStr = String.format("create table %s.%s(id int, name string)", dbName, tableName);
runCommand(queryStr);
String defaultTableName = "table" + RandomStringUtils.randomAlphanumeric(5);
runCommand("create table " + defaultTableName + "(id int, name string)");
runCommand("select * from " + defaultTableName);
runCommand("select * from " + dbName + "." + tableName);
String newTableName = "table" + RandomStringUtils.randomAlphanumeric(5);
runCommand("create table " + newTableName + " as select * from " + defaultTableName);
runCommand("create table " + dbName + "." + newTableName + " as select * from " + dbName + "." + tableName);
newTableName = "table" + RandomStringUtils.randomAlphanumeric(5);
runCommand("create table " + newTableName + " as select count(*) from " + defaultTableName);
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import com.thinkaurelius.titan.core.TitanGraph;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Vertex;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.repository.graph.GraphBackedMetadataRepository;
import org.apache.hadoop.metadata.repository.graph.GraphHelper;
import org.apache.hadoop.metadata.repository.graph.TitanGraphProvider;
import org.apache.hadoop.metadata.typesystem.ITypedReferenceableInstance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.List;
@Test(enabled = false)
public class HiveGraphRepositoryTest {
private static final Logger LOG =
LoggerFactory.getLogger(HiveGraphRepositoryTest.class);
protected HiveTypeSystem hts;
private GraphBackedMetadataRepository repository;
private TitanGraph graph;
@BeforeClass
public void setup() throws ConfigurationException, MetadataException {
final TitanGraphProvider titanGraphProvider = new TitanGraphProvider();
graph = titanGraphProvider.get();
repository = new GraphBackedMetadataRepository(titanGraphProvider);
hts = HiveTypeSystem.getInstance();
}
@AfterClass
public void tearDown() {
System.out.println("*******************Graph Dump****************************");
System.out.println("Vertices of " + graph);
for (Vertex vertex : graph.getVertices()) {
System.out.println(GraphHelper.vertexString(vertex));
}
System.out.println("Edges of " + graph);
for (Edge edge : graph.getEdges()) {
System.out.println(GraphHelper.edgeString(edge));
}
System.out.println("*******************Graph Dump****************************");
}
@Test(enabled = false)
public void testHiveImport() throws Exception {
HiveConf conf = new HiveConf();
HiveMetaStoreClient hiveMetaStoreClient;
hiveMetaStoreClient = new HiveMetaStoreClient(conf);
HiveImporter hImporter = new HiveImporter(repository, hts, hiveMetaStoreClient);
hImporter.importHiveMetadata();
LOG.info("Defined DB instances");
File f = new File("./target/logs/hiveobjs.txt");
f.getParentFile().mkdirs();
FileWriter fw = new FileWriter(f);
BufferedWriter bw = new BufferedWriter(fw);
List<String> idList =
repository.getEntityList(HiveTypeSystem.DefinedTypes.HIVE_DB.name());
for (String id : idList) {
ITypedReferenceableInstance instance = repository.getEntityDefinition(id);
bw.write(instance.toString());
}
LOG.info("Defined Table instances");
idList =
repository.getEntityList(HiveTypeSystem.DefinedTypes.HIVE_TABLE.name());
for (String id : idList) {
ITypedReferenceableInstance instance = repository.getEntityDefinition(id);
bw.write(instance.toString());
}
LOG.info("Defined Partition instances");
idList =
repository.getEntityList(HiveTypeSystem.DefinedTypes.HIVE_PARTITION.name());
for (String id : idList) {
ITypedReferenceableInstance instance = repository.getEntityDefinition(id);
bw.write(instance.toString());
}
LOG.info("Defined Column instances");
idList =
repository.getEntityList(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
for (String id : idList) {
ITypedReferenceableInstance instance = repository.getEntityDefinition(id);
bw.write(instance.toString());
}
LOG.info("Defined Index instances");
idList =
repository.getEntityList(HiveTypeSystem.DefinedTypes.HIVE_INDEX.name());
for (String id : idList) {
ITypedReferenceableInstance instance = repository.getEntityDefinition(id);
bw.write(instance.toString());
}
LOG.info("Defined Process instances");
idList =
repository.getEntityList(HiveTypeSystem.DefinedTypes.HIVE_PROCESS.name());
for (String id : idList) {
ITypedReferenceableInstance instance = repository.getEntityDefinition(id);
bw.write(instance.toString());
}
bw.flush();
bw.close();
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.metadata.typesystem.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.typesystem.persistence.Id;
import org.apache.hadoop.metadata.repository.memory.MemRepository;
import org.apache.hadoop.metadata.typesystem.types.TypeSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
@Test(enabled = true)
public class HiveTypeSystemTest {
private static final String hiveHost = "c6501.ambari.apache.org";
private static final short hivePort = 10000;
private static final Logger LOG =
LoggerFactory.getLogger(HiveTypeSystemTest.class);
protected MemRepository mr;
protected HiveTypeSystem hts;
@BeforeClass
public void setup() throws MetadataException {
TypeSystem ts = TypeSystem.getInstance();
ts.reset();
mr = new MemRepository(ts);
hts = HiveTypeSystem.getInstance();
}
@Test(enabled = true)
public void testHiveImport() throws MetaException, MetadataException, IOException {
HiveConf conf = new HiveConf();
HiveMetaStoreClient hiveMetaStoreClient;
hiveMetaStoreClient = new HiveMetaStoreClient(conf);
HiveImporter hImporter = new HiveImporter(mr, hts, hiveMetaStoreClient);
hImporter.importHiveMetadata();
LOG.info("Defined DB instances");
File f = new File("./target/logs/hiveobjs.txt");
f.getParentFile().mkdirs();
FileWriter fw = new FileWriter(f);
BufferedWriter bw = new BufferedWriter(fw);
for (Id id : hImporter.getDBInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
bw.write(instance.toString());
}
LOG.info("Defined Table instances");
for (Id id : hImporter.getTableInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
bw.write(instance.toString());
}
LOG.info("Defined Partition instances");
for (Id id : hImporter.getPartitionInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
bw.write(instance.toString());
}
LOG.info("Defined Column instances");
for (Id id : hImporter.getColumnInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
bw.write(instance.toString());
}
LOG.info("Defined Index instances");
for (Id id : hImporter.getIndexInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
bw.write(instance.toString());
}
LOG.info("Defined Process instances");
for (Id id : hImporter.getProcessInstances()) {
ITypedReferenceableInstance instance = mr.get(id);
bw.write(instance.toString());
}
bw.flush();
bw.close();
}
@Test(enabled = true)
public void testHiveLineage() throws Exception {
Class.forName("org.apache.hive.jdbc.HiveDriver");
String url = "jdbc:hive2://" + hiveHost + ":" + hivePort;
Connection con = DriverManager.getConnection(url, "ambari-qa", "");
Statement stmt = con.createStatement();
stmt.execute("drop table if exists t");
stmt.execute("create table t(a int, b string)");
stmt.execute("drop table if exists t2");
stmt.execute("create table t2 as select * from t");
}
}
\ No newline at end of file
......@@ -68,7 +68,12 @@ public class MetadataServiceClient {
//Trait operations
ADD_TRAITS("api/metadata/traits/add", HttpMethod.POST),
DELETE_TRAITS("api/metadata/traits/delete", HttpMethod.PUT),
LIST_TRAITS("api/metadata/traits/list", HttpMethod.GET);
LIST_TRAITS("api/metadata/traits/list", HttpMethod.GET),
//Search operations
SEARCH("api/metadata/discovery/search", HttpMethod.GET),
SEARCH_DSL("api/metadata/discovery/search/dsl", HttpMethod.GET),
SEARCH_GREMLIN("api/metadata/discovery/search/gremlin", HttpMethod.GET);
private final String method;
private final String path;
......@@ -87,10 +92,6 @@ public class MetadataServiceClient {
}
}
public JSONObject createType(String typeAsJson) throws MetadataServiceException {
return callAPI(API.CREATE_TYPE, typeAsJson);
}
public List<String> listTypes() throws MetadataServiceException {
try {
final JSONObject jsonObject = callAPI(API.LIST_TYPES, null);
......@@ -106,10 +107,75 @@ public class MetadataServiceClient {
}
}
/**
* Register the given type(meta model)
* @param typeAsJson type definition a jaon
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject createType(String typeAsJson) throws MetadataServiceException {
return callAPI(API.CREATE_TYPE, typeAsJson);
}
/**
* Create the given entity
* @param entityAsJson entity(type instance) as json
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject createEntity(String entityAsJson) throws MetadataServiceException {
return callAPI(API.CREATE_ENTITY, entityAsJson);
}
/**
* Get an entity given the entity id
* @param guid entity id
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject getEntity(String guid) throws MetadataServiceException {
return callAPI(API.GET_ENTITY, null, guid);
}
/**
* Search given type name, an attribute and its value. Uses search dsl
* @param typeName name of the entity type
* @param attributeName attribute name
* @param attributeValue attribute value
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject search(String typeName, String attributeName, Object attributeValue) throws MetadataServiceException {
//TODO replace with DSL when DSL works
String gremlinQuery = String.format("g.V.has(\"typeName\",\"%s\").and(_().has(\"%s.%s\", T.eq, \"%s\")).toList()",
typeName, typeName, attributeName, attributeValue);
return search(gremlinQuery);
}
/**
* Search given query DSL
* @param query DSL query
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject searchByDSL(String query) throws MetadataServiceException {
WebResource resource = getResource(API.SEARCH_DSL);
resource = resource.queryParam("query", query);
return callAPIWithResource(API.SEARCH_DSL, resource);
}
/**
* Search given gremlin query
* @param gremlinQuery Gremlin query
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject search(String gremlinQuery) throws MetadataServiceException {
WebResource resource = getResource(API.SEARCH);
resource = resource.queryParam("query", gremlinQuery);
return callAPIWithResource(API.SEARCH, resource);
}
public String getRequestId(JSONObject json) throws MetadataServiceException {
try {
return json.getString(REQUEST_ID);
......@@ -118,17 +184,23 @@ public class MetadataServiceClient {
}
}
private JSONObject callAPI(API api, Object requestObject, String... pathParams) throws MetadataServiceException {
private WebResource getResource(API api, String... pathParams) {
WebResource resource = service.path(api.getPath());
if (pathParams != null) {
for (String pathParam : pathParams) {
resource = resource.path(pathParam);
}
}
return resource;
}
private JSONObject callAPIWithResource(API api, WebResource resource) throws MetadataServiceException {
return callAPIWithResource(api, resource, null);
}
ClientResponse clientResponse = resource
.accept(MediaType.APPLICATION_JSON)
.type(MediaType.APPLICATION_JSON)
private JSONObject callAPIWithResource(API api, WebResource resource, Object requestObject)
throws MetadataServiceException {
ClientResponse clientResponse = resource.accept(MediaType.APPLICATION_JSON).type(MediaType.APPLICATION_JSON)
.method(api.getMethod(), ClientResponse.class, requestObject);
if (clientResponse.getStatus() == Response.Status.OK.getStatusCode()) {
......@@ -141,4 +213,9 @@ public class MetadataServiceClient {
}
throw new MetadataServiceException(api, clientResponse.getClientResponseStatus());
}
private JSONObject callAPI(API api, Object requestObject, String... pathParams) throws MetadataServiceException {
WebResource resource = getResource(api, pathParams);
return callAPIWithResource(api, resource, requestObject);
}
}
......@@ -206,6 +206,7 @@
<module>webapp</module>
<module>docs</module>
<module>tools</module>
<module>addons/hive-bridge</module>
</modules>
<repositories>
......@@ -267,18 +268,6 @@
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-avatica</artifactId>
<version>0.9.2-incubating</version>
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<version>0.9.2-incubating</version>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jsp-2.1</artifactId>
<version>6.0.0</version>
......@@ -503,6 +492,12 @@
<!--<scope>test</scope>-->
</dependency>
<dependency>
<groupId>com.tinkerpop.gremlin</groupId>
<artifactId>gremlin-java</artifactId>
<version>${tinkerpop.version}</version>
</dependency>
<!-- metadata modules -->
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
......
......@@ -95,6 +95,11 @@
</dependency>
<dependency>
<groupId>com.tinkerpop.gremlin</groupId>
<artifactId>gremlin-java</artifactId>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
</dependency>
......
......@@ -36,7 +36,9 @@ public class Struct implements IStruct {
@InterfaceAudience.Private
public Struct(String typeName, Map<String, Object> values) {
this(typeName);
this.values.putAll(values);
if (values != null) {
this.values.putAll(values);
}
}
@Override
......
......@@ -273,7 +273,7 @@
<artifactId>maven-jetty-plugin</artifactId>
<version>${jetty.version}</version>
<configuration>
<skip>${skipITs}</skip>
<skip>${skipTests}</skip>
<!--only skip int tests -->
<connectors>
<!--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment