Commit 7a8e847a by Venkat Ranganathan

HiveTypeSystem process changes and HiveHook using semantic analyzer hooks

parent 1d850208
......@@ -58,6 +58,26 @@
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>0.14.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.2</version>
......@@ -92,4 +112,4 @@
</dependency>
</dependencies>
</project>
\ No newline at end of file
</project>
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.hivetypes;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
import org.apache.hadoop.hive.ql.hooks.HookContext;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.util.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* DgiHook sends lineage information to the DgiSever
*/
public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHook {
private static final Log LOG = LogFactory.getLog(HiveHook.class.getName());
private static ExecutorService executor;
private HiveTypeSystem hiveTypeSystem;
private static final String dgcDumpDir = "/tmp/dgcfiles";
// wait time determines how long we wait before we exit the jvm on
// shutdown. Pending requests after that will not be sent.
private static final int WAIT_TIME = 3;
Connection connection = null;
PreparedStatement insertStatement = null;
PreparedStatement updateStatement = null;
private static final String dbHost = "10.11.4.125";
private static final String url = "jdbc:postgres://" + dbHost + "/dgctest";
private static final String user = "postgres";
private static final String password = "postgres";
private static final String insertQuery =
"insert into query_info(query_id, query_text, query_plan, start_time, user_name, query_graph) "
+ "values (?, ?, ?, ?, ?, ?";
private static final String updateQuery =
"update query_info set end_time = ? where query_id = ?";
static {
// anything shared should be initialized here and destroyed in the
// shutdown hook The hook contract is weird in that it creates a
// boatload of hooks.
// initialize the async facility to process hook calls. We don't
// want to do this inline since it adds plenty of overhead for the
// query.
executor = Executors.newSingleThreadExecutor(
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("DGI Logger %d")
.build());
try {
Runtime.getRuntime().addShutdownHook(
new Thread() {
@Override
public void run() {
try {
executor.shutdown();
executor.awaitTermination(WAIT_TIME, TimeUnit.SECONDS);
executor = null;
} catch (InterruptedException ie) {
LOG.info("Interrupt received in shutdown.");
}
// shutdown client
}
}
);
} catch (IllegalStateException is) {
LOG.info("Attempting to send msg while shutdown in progress.");
}
LOG.info("Created DgiHook");
}
public HiveHook() {
try {
File dgcDumpFile = new File(dgcDumpDir);
dgcDumpFile.mkdirs();
connection = DriverManager.getConnection(url, user, password);
insertStatement = connection.prepareStatement(insertQuery);
updateStatement = connection.prepareStatement(updateQuery);
}
catch(Exception e) {
LOG.error("Exception initializing HiveHook " + e);
}
}
private class MySemanticAnaylzer extends BaseSemanticAnalyzer {
public MySemanticAnaylzer(HiveConf conf) throws SemanticException {
super(conf);
}
public void analyzeInternal(ASTNode ast) throws SemanticException {
throw new RuntimeException("Not implemented");
}
public void setInputs(HashSet<ReadEntity> inputs) {
this.inputs = inputs;
}
}
@Override
public void run(final HookContext hookContext) throws Exception {
if (executor == null) {
LOG.info("No executor running. Bail.");
return;
}
final long currentTime = System.currentTimeMillis();
// clone to avoid concurrent access
final HiveConf conf = new HiveConf(hookContext.getConf());
executor.submit(
new Runnable() {
@Override
public void run() {
try {
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
LOG.info("no plan in callback.");
return;
}
String queryId = plan.getQueryId();
long queryStartTime = plan.getQueryStartTime();
String user = hookContext.getUgi().getUserName();
String operationType = hookContext.getOperationName();
Set<WriteEntity> outputs = plan.getOutputs();
Set<ReadEntity> inputs = plan.getInputs();
switch (hookContext.getHookType()) {
case PRE_EXEC_HOOK: // command about to execute
ExplainTask explain = new ExplainTask();
explain.initialize(conf, plan, null);
String query = plan.getQueryStr();
List<Task<?>> rootTasks = plan.getRootTasks();
//We need to somehow get the sem associated with the plan and use it here.
//MySemanticAnaylzer sem = new MySemanticAnaylzer(conf);
//sem.setInputs(plan.getInputs());
//ExplainWork ew = new ExplainWork(null, null, rootTasks, plan.getFetchTask(), null, sem,
// false, true, false, false, false);
//JSONObject explainPlan =
// explain.getJSONLogicalPlan(null, ew);
String graph = "";
if (plan.getQuery().getStageGraph() != null) {
graph = plan.getQuery().getStageGraph().toString();
}
JSONObject explainPlan =
explain.getJSONPlan(null, null, rootTasks,
plan.getFetchTask(), true, false, false);
fireAndForget(conf,
createPreHookEvent(queryId, query,
explainPlan, queryStartTime,
user, inputs, outputs, graph));
break;
case POST_EXEC_HOOK: // command succeeded successfully
fireAndForget(conf, createPostHookEvent(queryId, currentTime, user,
true, inputs, outputs));
break;
case ON_FAILURE_HOOK: // command failed
fireAndForget(conf, createPostHookEvent(queryId, currentTime, user,
false, inputs, outputs));
break;
default:
//ignore
LOG.info("unknown hook type");
break;
}
} catch (Exception e) {
LOG.info("Failed to submit plan: " + StringUtils.stringifyException(e));
}
}
}
);
}
private void appendEntities(JSONObject obj, String key,
Set<? extends Entity> entities)
throws JSONException {
for (Entity e : entities) {
if (e != null) {
JSONObject entityObj = new JSONObject();
entityObj.put("type", e.getType().toString());
entityObj.put("name", e.toString());
obj.append(key, entityObj);
}
}
}
private JSONObject createPreHookEvent(String queryId, String query,
JSONObject explainPlan, long startTime, String user,
Set<ReadEntity> inputs, Set<WriteEntity> outputs, String graph)
throws JSONException {
JSONObject queryObj = new JSONObject();
queryObj.put("queryText", query);
queryObj.put("queryPlan", explainPlan);
queryObj.put("queryId", queryId);
queryObj.put("startTime", startTime);
queryObj.put("user", user);
queryObj.put("graph", graph);
appendEntities(queryObj, "inputs", inputs);
appendEntities(queryObj, "output", outputs);
LOG.info("Received pre-hook notification for :" + queryId);
if (LOG.isDebugEnabled()) {
LOG.debug("DGI Info: " + queryObj.toString(2));
}
return queryObj;
}
private JSONObject createPostHookEvent(String queryId, long stopTime,
String user, boolean success, Set<ReadEntity> inputs,
Set<WriteEntity> outputs)
throws JSONException {
JSONObject completionObj = new JSONObject();
completionObj.put("queryId", queryId);
completionObj.put("stopTime", stopTime);
completionObj.put("user", user);
completionObj.put("result", success);
appendEntities(completionObj, "inputs", inputs);
appendEntities(completionObj, "output", outputs);
LOG.info("Received post-hook notification for :" + queryId);
if (LOG.isDebugEnabled()) {
LOG.debug("DGI Info: " + completionObj.toString(2));
}
return completionObj;
}
private synchronized void fireAndForget(Configuration conf, JSONObject obj) throws Exception {
LOG.info("Submitting: " + obj.toString(2));
String queryId = (String)obj.get("queryId");
try {
BufferedWriter fw = new BufferedWriter(new FileWriter(new File(dgcDumpDir, queryId), true));
fw.write(obj.toString(2));
fw.flush();
fw.close();
}
catch (Exception e) {
LOG.error("Unable to log logical plan to file", e);
}
}
private void analyzeHiveParseTree(ASTNode ast) {
String astStr = ast.dump();
Tree tab = ast.getChild(0);
String fullTableName;
boolean isExternal = false;
boolean isTemporary = false;
String inputFormat = null;
String outputFormat = null;
String serde = null;
String storageHandler = null;
String likeTableName = null;
String comment = null;
Tree ctasNode = null;
Tree rowFormatNode = null;
String location = null;
Map<String, String> serdeProps = new HashMap<>();
try {
BufferedWriter fw = new BufferedWriter(new FileWriter(new File(dgcDumpDir, "ASTDump"), true));
fw.write("Full AST Dump" + astStr);
switch(ast.getToken().getType()) {
case HiveParser.TOK_CREATETABLE:
if (tab.getType() != HiveParser.TOK_TABNAME ||
(tab.getChildCount() != 1 && tab.getChildCount() != 2)) {
LOG.error("Ignoring malformed Create table statement");
}
if (tab.getChildCount() == 2) {
String dbName = BaseSemanticAnalyzer.unescapeIdentifier(tab.getChild(0).getText());
String tableName = BaseSemanticAnalyzer.unescapeIdentifier(tab.getChild(1).getText());
fullTableName = dbName + "." + tableName;
}
else {
fullTableName = BaseSemanticAnalyzer.unescapeIdentifier(tab.getChild(0).getText());
}
LOG.info("Creating table " + fullTableName);
int numCh = ast.getChildCount();
for (int num = 1; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
// Handle storage format
switch (child.getToken().getType()) {
case HiveParser.TOK_TABLEFILEFORMAT:
if (child.getChildCount() < 2) {
throw new SemanticException(
"Incomplete specification of File Format. " +
"You must provide InputFormat, OutputFormat.");
}
inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText());
outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(1).getText());
if (child.getChildCount() == 3) {
serde = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(2).getText());
}
break;
case HiveParser.TOK_STORAGEHANDLER:
storageHandler = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText());
if (child.getChildCount() == 2) {
BaseSemanticAnalyzer.readProps(
(ASTNode) (child.getChild(1).getChild(0)),
serdeProps);
}
break;
case HiveParser.TOK_FILEFORMAT_GENERIC:
ASTNode grandChild = (ASTNode) child.getChild(0);
String name = (grandChild == null ? "" : grandChild.getText()).trim().toUpperCase();
if (name.isEmpty()) {
LOG.error("File format in STORED AS clause is empty");
break;
}
break;
}
switch (child.getToken().getType()) {
case HiveParser.KW_EXTERNAL:
isExternal = true;
break;
case HiveParser.KW_TEMPORARY:
isTemporary = true;
break;
case HiveParser.TOK_LIKETABLE:
if (child.getChildCount() > 0) {
likeTableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) child.getChild(0));
}
break;
case HiveParser.TOK_QUERY:
ctasNode = child;
break;
case HiveParser.TOK_TABLECOMMENT:
comment = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText());
break;
case HiveParser.TOK_TABLEPARTCOLS:
case HiveParser.TOK_TABCOLLIST:
case HiveParser.TOK_ALTERTABLE_BUCKETS:
break;
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatNode = child;
break;
case HiveParser.TOK_TABLELOCATION:
location = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText());
break;
case HiveParser.TOK_TABLEPROPERTIES:
break;
case HiveParser.TOK_TABLESERIALIZER:
child = (ASTNode) child.getChild(0);
serde = BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText());
break;
case HiveParser.TOK_TABLESKEWED:
break;
default:
throw new AssertionError("Unknown token: " + child.getToken());
}
}
StringBuilder sb = new StringBuilder(1024);
sb.append("Full table name: ").append(fullTableName).append('\n');
sb.append("\tisTemporary: ").append(isTemporary).append('\n');
sb.append("\tIsExternal: ").append(isExternal).append('\n');
if (inputFormat != null) {
sb.append("\tinputFormat: ").append(inputFormat).append('\n');
}
if (outputFormat != null) {
sb.append("\toutputFormat: ").append(outputFormat).append('\n');
}
if (serde != null) {
sb.append("\tserde: ").append(serde).append('\n');
}
if (storageHandler != null) {
sb.append("\tstorageHandler: ").append(storageHandler).append('\n');
}
if (likeTableName != null) {
sb.append("\tlikeTableName: ").append(likeTableName);
}
if (comment != null) {
sb.append("\tcomment: ").append(comment);
}
if (location != null) {
sb.append("\tlocation: ").append(location);
}
if (ctasNode != null) {
sb.append("\tctasNode: ").append(((ASTNode) ctasNode).dump());
}
if (rowFormatNode != null) {
sb.append("\trowFormatNode: ").append(((ASTNode)rowFormatNode).dump());
}
fw.write(sb.toString());
}
fw.flush();
fw.close();
}
catch (Exception e) {
LOG.error("Unable to log logical plan to file", e);
}
}
private void parseQuery(String sqlText) throws Exception {
ParseDriver parseDriver = new ParseDriver();
ASTNode node = parseDriver.parse(sqlText);
analyzeHiveParseTree(node);
}
/**
* This is an attempt to use the parser. Sematnic issues are not handled here.
*
* Trying to recompile the query runs into some issues in the preExec
* hook but we need to make sure all the semantic issues are handled. May be we should save the AST in the
* Semantic analyzer and have it available in the preExec hook so that we walk with it freely.
*
* @param context
* @param ast
* @return
* @throws SemanticException
*/
@Override
public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) throws SemanticException {
analyzeHiveParseTree(ast);
return ast;
}
@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<? extends Serializable>> rootTasks) throws SemanticException {
}
}
......@@ -18,6 +18,7 @@
package org.apache.hadoop.metadata.hivetypes;
import org.apache.hadoop.hive.cli.CliDriver;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
......@@ -28,6 +29,7 @@ import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.service.HiveClient;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.ITypedStruct;
import org.apache.hadoop.metadata.MetadataException;
......@@ -64,7 +66,6 @@ public class HiveImporter {
private List<Id> processInstances;
private class Pair<L, R> {
final L left;
final R right;
......@@ -73,8 +74,14 @@ public class HiveImporter {
this.left = left;
this.right = right;
}
public L left() { return this.left;}
public R right() { return this.right;}
public L left() {
return this.left;
}
public R right() {
return this.right;
}
}
......@@ -84,7 +91,8 @@ public class HiveImporter {
}
}
public HiveImporter(MetadataRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc) throws RepositoryException {
public HiveImporter( MetadataRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc)
throws RepositoryException {
this(hts, hmc);
if (repo == null) {
......@@ -96,7 +104,8 @@ public class HiveImporter {
}
public HiveImporter(IRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc) throws RepositoryException {
public HiveImporter(IRepository repo, HiveTypeSystem hts, HiveMetaStoreClient hmc)
throws RepositoryException {
this(hts, hmc);
if (repo == null) {
......@@ -158,6 +167,9 @@ public class HiveImporter {
}
}
public void importHiveRTInfo(String stmt) throws MetadataException {
}
private boolean usingMemRepository() {
return this.graphRepository == null;
......@@ -182,7 +194,7 @@ public class HiveImporter {
}
private void setReferenceInstanceAttribute(Referenceable ref, String attr,
InstancePair instance) {
InstancePair instance) {
if (usingMemRepository()) {
ref.set(attr, instance.left());
} else {
......@@ -218,112 +230,164 @@ public class HiveImporter {
List<String> hiveTables = hiveMetastoreClient.getAllTables(db);
for (String table : hiveTables) {
LOG.info("Importing objects from " + db + "." + table);
Table hiveTable = hiveMetastoreClient.getTable(db, table);
Referenceable tableRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_TABLE.name());
setReferenceInstanceAttribute(tableRef, "dbName", dbRefTyped);
tableRef.set("tableName", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner());
tableRef.set("createTime", hiveTable.getCreateTime());
tableRef.set("lastAccessTime", hiveTable.getLastAccessTime());
tableRef.set("retention", hiveTable.getRetention());
StorageDescriptor storageDesc = hiveTable.getSd();
InstancePair sdRefTyped = fillStorageDescStruct(storageDesc);
setReferenceInstanceAttribute(tableRef, "sd", sdRefTyped);
List<InstancePair> partKeys = new ArrayList<>();
Referenceable colRef;
if (hiveTable.getPartitionKeysSize() > 0) {
for (FieldSchema fs : hiveTable.getPartitionKeys()) {
colRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
InstancePair colRefTyped = createInstance(colRef);
partKeys.add(colRefTyped);
importTable(db, table, dbRefTyped);
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importTable(String db, String table, InstancePair dbRefTyped) throws MetadataException {
try {
LOG.info("Importing objects from " + db + "." + table);
Table hiveTable = hiveMetastoreClient.getTable(db, table);
Referenceable tableRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_TABLE.name());
setReferenceInstanceAttribute(tableRef, "dbName", dbRefTyped);
tableRef.set("tableName", hiveTable.getTableName());
tableRef.set("owner", hiveTable.getOwner());
tableRef.set("createTime", hiveTable.getCreateTime());
tableRef.set("lastAccessTime", hiveTable.getLastAccessTime());
tableRef.set("retention", hiveTable.getRetention());
StorageDescriptor storageDesc = hiveTable.getSd();
InstancePair sdRefTyped = fillStorageDescStruct(storageDesc);
setReferenceInstanceAttribute(tableRef, "sd", sdRefTyped);
List<InstancePair> partKeys = new ArrayList<>();
Referenceable colRef;
if (hiveTable.getPartitionKeysSize() > 0) {
for (FieldSchema fs : hiveTable.getPartitionKeys()) {
colRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_COLUMN.name());
colRef.set("name", fs.getName());
colRef.set("type", fs.getType());
colRef.set("comment", fs.getComment());
InstancePair colRefTyped = createInstance(colRef);
partKeys.add(colRefTyped);
}
if (usingMemRepository()) {
List<ITypedReferenceableInstance> keys = new ArrayList<>();
for (InstancePair ip : partKeys) {
keys.add(ip.left());
}
if (usingMemRepository()) {
List<ITypedReferenceableInstance> keys = new ArrayList<>();
for (InstancePair ip : partKeys) {
keys.add(ip.left());
}
tableRef.set("partitionKeys", keys);
} else {
List<Referenceable> keys = new ArrayList<>();
for (InstancePair ip : partKeys) {
keys.add(ip.right());
}
tableRef.set("partitionKeys", keys);
tableRef.set("partitionKeys", keys);
} else {
List<Referenceable> keys = new ArrayList<>();
for (InstancePair ip : partKeys) {
keys.add(ip.right());
}
tableRef.set("partitionKeys", keys);
}
tableRef.set("parameters", hiveTable.getParameters());
if (hiveTable.isSetViewOriginalText()) {
tableRef.set("viewOriginalText", hiveTable.getViewOriginalText());
}
if (hiveTable.isSetViewExpandedText()) {
tableRef.set("viewExpandedText", hiveTable.getViewExpandedText());
}
tableRef.set("tableType", hiveTable.getTableType());
tableRef.set("temporary", hiveTable.isTemporary());
}
tableRef.set("parameters", hiveTable.getParameters());
if (hiveTable.isSetViewOriginalText()) {
tableRef.set("viewOriginalText", hiveTable.getViewOriginalText());
}
if (hiveTable.isSetViewExpandedText()) {
tableRef.set("viewExpandedText", hiveTable.getViewExpandedText());
}
tableRef.set("tableType", hiveTable.getTableType());
tableRef.set("temporary", hiveTable.isTemporary());
InstancePair tableRefTyped = createInstance(tableRef);
if (usingMemRepository()) {
tableInstances.add(tableRefTyped.left().getId());
InstancePair tableRefTyped = createInstance(tableRef);
if (usingMemRepository()) {
tableInstances.add(tableRefTyped.left().getId());
}
importPartitions(db, table, dbRefTyped, tableRefTyped, sdRefTyped);
List<Index> indexes = hiveMetastoreClient.listIndexes(db, table, Short.MAX_VALUE);
if (indexes.size() > 0) {
for (Index index : indexes) {
importIndexes(db, table, dbRefTyped, tableRef);
}
}
} catch (Exception e) {
throw new MetadataException(e);
}
List<Partition> tableParts = hiveMetastoreClient.listPartitions(db, table, Short.MAX_VALUE);
hiveMetastoreClient.listPartitionSpecs(db, table, Short.MAX_VALUE);
if (tableParts.size() > 0) {
for (Partition hivePart : tableParts) {
Referenceable partRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_PARTITION.name());
partRef.set("values", hivePart.getValues());
setReferenceInstanceAttribute(partRef, "dbName", dbRefTyped);
setReferenceInstanceAttribute(partRef, "tableName", tableRefTyped);
partRef.set("createTime", hivePart.getCreateTime());
partRef.set("lastAccessTime", hivePart.getLastAccessTime());
//sdStruct = fillStorageDescStruct(hivePart.getSd());
// Instead of creating copies of the sdstruct for partitions we are reusing existing ones
// will fix to identify partitions with differing schema.
setReferenceInstanceAttribute(partRef, "sd", sdRefTyped);
partRef.set("parameters", hivePart.getParameters());
InstancePair partRefTyped = createInstance(partRef);
if (usingMemRepository()) {
partitionInstances.add(partRefTyped.left().getId());
}
}
}
private void importPartitions(String db, String table, InstancePair dbRefTyped,
InstancePair tableRefTyped, InstancePair sdRefTyped)
throws MetadataException {
try {
List<Partition> tableParts = hiveMetastoreClient.listPartitions(db, table, Short.MAX_VALUE);
if (tableParts.size() > 0) {
for (Partition hivePart : tableParts) {
importPartition(hivePart, dbRefTyped, tableRefTyped, sdRefTyped);
}
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
List<Index> indexes = hiveMetastoreClient.listIndexes(db, table, Short.MAX_VALUE);
if (indexes.size() > 0 ) {
for (Index index : indexes) {
Referenceable indexRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_INDEX.name());
indexRef.set("indexName", index.getIndexName());
indexRef.set("indexHandlerClass", index.getIndexHandlerClass());
setReferenceInstanceAttribute(indexRef, "dbName", dbRefTyped);
indexRef.set("createTime", index.getCreateTime());
indexRef.set("lastAccessTime", index.getLastAccessTime());
indexRef.set("origTableName", index.getOrigTableName());
indexRef.set("indexTableName", index.getIndexTableName());
sdRefTyped = fillStorageDescStruct(index.getSd());
setReferenceInstanceAttribute(indexRef, "sd", sdRefTyped);
indexRef.set("parameters", index.getParameters());
tableRef.set("deferredRebuild", index.isDeferredRebuild());
InstancePair indexRefTyped = createInstance(indexRef);
if (usingMemRepository()) {
indexInstances.add(indexRefTyped.left().getId());
}
}
private void importPartition(Partition hivePart,
InstancePair dbRefTyped, InstancePair tableRefTyped, InstancePair sdRefTyped)
throws MetadataException {
try {
Referenceable partRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_PARTITION.name());
partRef.set("values", hivePart.getValues());
setReferenceInstanceAttribute(partRef, "dbName", dbRefTyped);
setReferenceInstanceAttribute(partRef, "tableName", tableRefTyped);
partRef.set("createTime", hivePart.getCreateTime());
partRef.set("lastAccessTime", hivePart.getLastAccessTime());
//sdStruct = fillStorageDescStruct(hivePart.getSd());
// Instead of creating copies of the sdstruct for partitions we are reusing existing ones
// will fix to identify partitions with differing schema.
setReferenceInstanceAttribute(partRef, "sd", sdRefTyped);
partRef.set("parameters", hivePart.getParameters());
InstancePair partRefTyped = createInstance(partRef);
if (usingMemRepository()) {
partitionInstances.add(partRefTyped.left().getId());
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importIndexes(String db, String table, InstancePair dbRefTyped, Referenceable tableRef)
throws MetadataException {
try {
List<Index> indexes = hiveMetastoreClient.listIndexes(db, table, Short.MAX_VALUE);
if (indexes.size() > 0) {
for (Index index : indexes) {
importIndex(index, dbRefTyped, tableRef);
}
}
} catch (Exception te) {
throw new MetadataException(te);
} catch (Exception e) {
throw new MetadataException(e);
}
}
private void importIndex(Index index,
InstancePair dbRefTyped, Referenceable tableRef)
throws MetadataException {
try {
Referenceable indexRef = new Referenceable(HiveTypeSystem.DefinedTypes.HIVE_INDEX.name());
indexRef.set("indexName", index.getIndexName());
indexRef.set("indexHandlerClass", index.getIndexHandlerClass());
setReferenceInstanceAttribute(indexRef, "dbName", dbRefTyped);
indexRef.set("createTime", index.getCreateTime());
indexRef.set("lastAccessTime", index.getLastAccessTime());
indexRef.set("origTableName", index.getOrigTableName());
indexRef.set("indexTableName", index.getIndexTableName());
InstancePair sdRefTyped = fillStorageDescStruct(index.getSd());
setReferenceInstanceAttribute(indexRef, "sd", sdRefTyped);
indexRef.set("parameters", index.getParameters());
tableRef.set("deferredRebuild", index.isDeferredRebuild());
InstancePair indexRefTyped = createInstance(indexRef);
if (usingMemRepository()) {
indexInstances.add(indexRefTyped.left().getId());
}
} catch (Exception e) {
throw new MetadataException(e);
}
}
private InstancePair fillStorageDescStruct(StorageDescriptor storageDesc) throws Exception {
......@@ -344,7 +408,7 @@ public class HiveImporter {
LOG.debug("serdeInfo = " + serdeInfo);
StructType serdeInfotype = (StructType) hiveTypeSystem.getDataType(serdeInfoName);
StructType serdeInfotype = (StructType) hiveTypeSystem.getDataType(serdeInfoName);
ITypedStruct serdeInfoStructTyped =
serdeInfotype.convert(serdeInfoStruct, Multiplicity.OPTIONAL);
......@@ -367,7 +431,6 @@ public class HiveImporter {
//}
List<InstancePair> fieldsList = new ArrayList<>();
Referenceable colRef;
......
......@@ -511,7 +511,10 @@ public class HiveTypeSystem {
new AttributeDefinition("userName", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("sourceTableNames", String.format("array<%s>", DefinedTypes.HIVE_TABLE.name()), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("targetTableNames", String.format("array<%s>", DefinedTypes.HIVE_TABLE.name()), Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("jobDefinition", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryText", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryPlan", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryId", DataTypes.STRING_TYPE.getName(), Multiplicity.REQUIRED, false, null),
new AttributeDefinition("queryGraph", DataTypes.STRING_TYPE.getName(), Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class, DefinedTypes.HIVE_PROCESS.name(),
......
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://c6501.ambari.apache.org:8020</value>
<final>true</final>
</property>
<property>
<name>fs.trash.interval</name>
<value>360</value>
</property>
<property>
<name>hadoop.http.authentication.simple.anonymous.allowed</name>
<value>true</value>
</property>
<property>
<name>hadoop.proxyuser.hcat.groups</name>
<value>users</value>
</property>
<property>
<name>hadoop.proxyuser.hcat.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>users</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>hadoop.security.auth_to_local</name>
<value>
RULE:[2:$1@$0]([rn]m@.*)s/.*/yarn/
RULE:[2:$1@$0](jhs@.*)s/.*/mapred/
RULE:[2:$1@$0]([nd]n@.*)s/.*/hdfs/
RULE:[2:$1@$0](hm@.*)s/.*/hbase/
RULE:[2:$1@$0](rs@.*)s/.*/hbase/
DEFAULT</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>false</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>io.serializations</name>
<value>org.apache.hadoop.io.serializer.WritableSerialization</value>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>50</value>
</property>
<property>
<name>ipc.client.connection.maxidletime</name>
<value>30000</value>
</property>
<property>
<name>ipc.client.idlethreshold</name>
<value>8000</value>
</property>
<property>
<name>ipc.server.tcpnodelay</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobtracker.webinterface.trusted</name>
<value>false</value>
</property>
<property>
<name>proxyuser_group</name>
<value>users</value>
</property>
</configuration>
<!--Wed Feb 4 03:23:35 2015-->
<configuration>
<property>
<name>dfs.block.access.token.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.blockreport.initialDelay</name>
<value>120</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.size</name>
<value>4096</value>
</property>
<property>
<name>dfs.cluster.administrators</name>
<value> hdfs</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
</property>
<property>
<name>dfs.datanode.balance.bandwidthPerSec</name>
<value>6250000</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>750</value>
</property>
<property>
<name>dfs.datanode.du.reserved</name>
<value>1073741824</value>
</property>
<property>
<name>dfs.datanode.failed.volumes.tolerated</name>
<value>0</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
</property>
<property>
<name>dfs.datanode.https.address</name>
<value>0.0.0.0:50475</value>
</property>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:8010</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>4096</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/lib/hadoop-hdfs/dn_socket</value>
</property>
<property>
<name>dfs.heartbeat.interval</name>
<value>3</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/etc/hadoop/conf/dfs.exclude</value>
</property>
<property>
<name>dfs.http.policy</name>
<value>HTTP_ONLY</value>
</property>
<property>
<name>dfs.https.port</name>
<value>50470</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/hadoop/hdfs/journalnode</value>
</property>
<property>
<name>dfs.journalnode.http-address</name>
<value>0.0.0.0:8480</value>
</property>
<property>
<name>dfs.namenode.accesstime.precision</name>
<value>0</value>
</property>
<property>
<name>dfs.namenode.avoid.read.stale.datanode</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.avoid.write.stale.datanode</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/hadoop/hdfs/namesecondary</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>${dfs.namenode.checkpoint.dir}</value>
</property>
<property>
<name>dfs.namenode.checkpoint.period</name>
<value>21600</value>
</property>
<property>
<name>dfs.namenode.checkpoint.txns</name>
<value>1000000</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>40</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>c6501.ambari.apache.org:50070</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.https-address</name>
<value>c6501.ambari.apache.org:50470</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hadoop/hdfs/namenode</value>
</property>
<property>
<name>dfs.namenode.name.dir.restore</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.safemode.threshold-pct</name>
<value>1.0f</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>c6501.ambari.apache.org:50090</value>
</property>
<property>
<name>dfs.namenode.stale.datanode.interval</name>
<value>30000</value>
</property>
<property>
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
<value>3600</value>
</property>
<property>
<name>dfs.namenode.write.stale.datanode.ratio</name>
<value>1.0f</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hdfs</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.replication.max</name>
<value>50</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
<final>true</final>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>fs.permissions.umask-mode</name>
<value>022</value>
</property>
</configuration>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--Mon Apr 21 07:04:34 2014-->
<!--Wed Feb 4 03:19:28 2015-->
<configuration>
<property>
<name>hive.enforce.sorting</name>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
</property>
<property>
<name>datanucleus.cache.level2.type</name>
<value>none</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<value>true</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>250</value>
<name>hive.auto.convert.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>238026752</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.noconditionaltask</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
<value>false</value>
</property>
<property>
<name>hive.cbo.enable</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.header</name>
<value>false</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.class</name>
<value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
<value>c6501.ambari.apache.org:2181</value>
</property>
<property>
<name>hive.cluster.delegation.token.store.zookeeper.znode</name>
<value>/hive/cluster/delegation</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
</property>
......@@ -40,43 +97,58 @@
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx200m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC</value>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<name>hive.compute.query.using.stats</name>
<value>true</value>
</property>
<property>
<name>hive.conf.restricted.list</name>
<value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value>
</property>
<property>
<name>hive.convert.join.bucket.mapjoin.tez</name>
<value>false</value>
</property>
<property>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
<name>hive.enforce.bucketing</name>
<value>true</value>
</property>
<property>
<name>fs.file.impl.disable.cache</name>
<name>hive.enforce.sorting</name>
<value>true</value>
</property>
<property>
<name>hive.compute.query.using.stats</name>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
<name>hive.exec.compress.intermediate</name>
<value>false</value>
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask</name>
<name>hive.exec.compress.output</name>
<value>false</value>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.worker.timeout</name>
<value>86400L</value>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
......@@ -85,18 +157,43 @@
</property>
<property>
<name>ambari.hive.db.schema.name</name>
<value>hive</value>
<name>hive.exec.max.created.files</name>
<value>100000</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<name>hive.exec.max.dynamic.partitions</name>
<value>5000</value>
</property>
<property>
<name>hive.exec.max.dynamic.partitions.pernode</name>
<value>2000</value>
</property>
<property>
<name>hive.exec.orc.compression.strategy</name>
<value>SPEED</value>
</property>
<property>
<name>hive.exec.orc.default.compress</name>
<value>ZLIB</value>
</property>
<property>
<name>hive.exec.orc.default.stripe.size</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>false</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>true</value>
<name>hive.exec.parallel.thread.number</name>
<value>8</value>
</property>
<property>
......@@ -105,34 +202,88 @@
</property>
<property>
<name>hive.stats.autogather</name>
<name>hive.exec.pre.hooks</name>
<value>org.apache.hadoop.hive.ql.hooks.ATSHook</value>
</property>
<property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>67108864</value>
</property>
<property>
<name>hive.exec.reducers.max</name>
<value>1009</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive</value>
</property>
<property>
<name>hive.exec.submit.local.task.via.child</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication</name>
<name>hive.exec.submitviachild</name>
<value>false</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
</property>
<property>
<name>hive.fetch.task.aggr</name>
<value>false</value>
</property>
<property>
<name>hive.fetch.task.conversion</name>
<value>more</value>
</property>
<property>
<name>hive.fetch.task.conversion.threshold</name>
<value>1073741824</value>
</property>
<property>
<name>hive.heapsize</name>
<value>1024</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.bucketmapjoin</name>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
</property>
<property>
<name>hive.map.aggr</name>
<value>true</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
<name>hive.map.aggr.hash.force.flush.memory.threshold</name>
<value>0.9</value>
</property>
<property>
<name>hive.metastore.uris</name>
<!-- <value>thrift://10.10.11.207:9083</value> -->
<value>thrift://localhost:9083</value>
<name>hive.map.aggr.hash.min.reduction</name>
<value>0.5</value>
</property>
<property>
<name>hive.enforce.sortmergebucketmapjoin</name>
<value>true</value>
<name>hive.map.aggr.hash.percentmemory</name>
<value>0.5</value>
</property>
<property>
......@@ -141,43 +292,113 @@
</property>
<property>
<name>hive.auto.convert.join.noconditionaltask.size</name>
<value>357564416</value>
<name>hive.mapjoin.optimized.hashtable</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>hive.merge.mapfiles</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<name>hive.merge.mapredfiles</name>
<value>false</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>mr</value>
<name>hive.merge.orcfile.stripe.level</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.delta.num.threshold</name>
<value>10</value>
<name>hive.merge.rcfile.block.level</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.index.filter</name>
<name>hive.merge.size.per.task</name>
<value>256000000</value>
</property>
<property>
<name>hive.merge.smallfiles.avgsize</name>
<value>16000000</value>
</property>
<property>
<name>hive.merge.tezfiles</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.authorization.storage.checks</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
</property>
<property>
<name>hive.metastore.client.connect.retry.delay</name>
<value>5s</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>1800s</value>
</property>
<property>
<name>hive.metastore.connect.retries</name>
<value>24</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<value>true</value>
</property>
<property>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
<name>hive.metastore.failure.retries</name>
<value>24</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
<name>hive.metastore.kerberos.keytab.file</name>
<value>/etc/security/keytabs/hive.service.keytab</value>
</property>
<property>
<name>hive.metastore.kerberos.principal</name>
<value>hive/_HOST@EXAMPLE.COM</value>
</property>
<property>
<name>hive.metastore.pre.event.listeners</name>
<value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
</property>
<property>
<name>hive.metastore.sasl.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.server.max.threads</name>
<value>100000</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://c6501.ambari.apache.org:9083</value>
</property>
<property>
......@@ -186,143 +407,303 @@
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://sandbox.hortonworks.com/hive?createDatabaseIfNotExist=true</value>
<name>hive.optimize.bucketmapjoin</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.client.socket.timeout</name>
<value>60</value>
<name>hive.optimize.bucketmapjoin.sortedmerge</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>1.0</value>
<name>hive.optimize.constant.propagation</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<name>hive.optimize.index.filter</name>
<value>true</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
<name>hive.optimize.mapjoin.mapreduce</name>
<value>true</value>
</property>
<property>
<name>hive.semantic.analyzer.factory.impl</name>
<value>org.apache.hivealog.cli.HCatSemanticAnalyzerFactory</value>
<name>hive.optimize.metadataonly</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.join</name>
<name>hive.optimize.null.scan</name>
<value>true</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
<name>hive.optimize.reducededuplication</name>
<value>true</value>
</property>
<property>
<name>hive.enforce.bucketing</name>
<value>true</value>
<name>hive.optimize.reducededuplication.min.reducer</name>
<value>4</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution</name>
<name>hive.optimize.sort.dynamic.partition</name>
<value>false</value>
</property>
<property>
<name>hive.orc.compute.splits.num.threads</name>
<value>10</value>
</property>
<property>
<name>hive.orc.splits.include.file.footer</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.prewarm.numcontainers</name>
<value>10</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
</property>
<property>
<name>hive.compactor.abortedtxn.threshold</name>
<value>1000</value>
<name>hive.security.authorization.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.compactor.initiator.on</name>
<value>false</value>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
</property>
<property>
<name>hive.metastore.cache.pinobjtypes</name>
<value>Table,Database,Type,FieldSchema,Order</value>
<name>hive.security.metastore.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join</name>
<name>hive.security.metastore.authorization.auth.reads</name>
<value>true</value>
</property>
<property>
<name>hive.limit.pushdown.memory.usage</name>
<value>0.04</value>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider,org.apache.hadoop.hive.ql.security.authorization.MetaStoreAuthzAPIAuthorizerEmbedOnly</value>
</property>
<property>
<name>fs.hdfs.impl.disable.cache</name>
<name>hive.server2.allow.user.substitution</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.check.interval</name>
<value>300L</value>
<name>hive.server2.authentication</name>
<value>NONE</value>
</property>
<property>
<name>hive.map.aggr</name>
<name>hive.server2.authentication.spnego.keytab</name>
<value>HTTP/_HOST@EXAMPLE.COM</value>
</property>
<property>
<name>hive.server2.authentication.spnego.principal</name>
<value>/etc/security/keytabs/spnego.service.keytab</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>hive.compactor.delta.pct.threshold</name>
<value>0.1f</value>
<name>hive.server2.enable.impersonation</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>1024</value>
<name>hive.server2.logging.operation.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.execute.setugi</name>
<name>hive.server2.logging.operation.log.location</name>
<value>${system:java.io.tmpdir}/${system:user.name}/operation_logs</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>
<property>
<name>hive.auto.convert.sortmerge.join.noconditionaltask</name>
<name>hive.server2.table.type.mapping</name>
<value>CLASSIC</value>
</property>
<property>
<name>hive.server2.tez.default.queues</name>
<value>default</value>
</property>
<property>
<name>hive.server2.tez.initialize.default.sessions</name>
<value>false</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
</property>
<property>
<name>hive.server2.thrift.http.path</name>
<value>cliservice</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>10001</value>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>500</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.sasl.qop</name>
<value>auth</value>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>binary</value>
</property>
<property>
<name>hive.server2.use.SSL</name>
<value>false</value>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2</value>
</property>
<property>
<name>hive.smbjoin.cache.rows</name>
<value>10000</value>
</property>
<property>
<name>hive.stats.autogather</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>1024</value>
<name>hive.stats.dbclass</name>
<value>fs</value>
</property>
<property>
<name>hive.security.metastore.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
<name>hive.stats.fetch.column.stats</name>
<value>false</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<name>hive.stats.fetch.partition.stats</name>
<value>true</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>false</value>
</property>
<property>
<name>hive.optimize.mapjoin.mapreduce</name>
<name>hive.tez.auto.reducer.parallelism</name>
<value>false</value>
</property>
<property>
<name>hive.tez.container.size</name>
<value>682</value>
</property>
<property>
<name>hive.tez.cpu.vcores</name>
<value>-1</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning</name>
<value>true</value>
</property>
<property>
<name>hive.server2.tez.sessions.per.default.queue</name>
<value>1</value>
<name>hive.tez.dynamic.partition.pruning.max.data.size</name>
<value>104857600</value>
</property>
<property>
<name>hive.tez.dynamic.partition.pruning.max.event.size</name>
<value>1048576</value>
</property>
<property>
<name>hive.tez.input.format</name>
<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
</property>
<property>
<name>hive.tez.java.opts</name>
<value>-server -Xmx546m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps</value>
</property>
<property>
<name>hive.tez.log.level</name>
<value>INFO</value>
</property>
<property>
<name>hive.tez.max.partition.factor</name>
<value>2.0</value>
</property>
<property>
<name>hive.tez.min.partition.factor</name>
<value>0.25</value>
</property>
<property>
<name>hive.tez.smb.number.waves</name>
<value>0.5</value>
</property>
<property>
<name>hive.txn.manager</name>
<value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
</property>
<property>
......@@ -330,4 +711,69 @@
<value>1000</value>
</property>
<property>
<name>hive.txn.timeout</name>
<value>300</value>
</property>
<property>
<name>hive.user.install.directory</name>
<value>/user/</value>
</property>
<property>
<name>hive.vectorized.execution.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.vectorized.execution.reduce.enabled</name>
<value>false</value>
</property>
<property>
<name>hive.vectorized.groupby.checkinterval</name>
<value>4096</value>
</property>
<property>
<name>hive.vectorized.groupby.flush.percent</name>
<value>0.1</value>
</property>
<property>
<name>hive.vectorized.groupby.maxentries</name>
<value>100000</value>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>
<property>
<name>hive.zookeeper.namespace</name>
<value>hive_zookeeper_namespace</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>c6501.ambari.apache.org:2181</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://c6501.ambari.apache.org/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
</configuration>
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
org.apache.hadoop.metadata=DEBUG, console
org.apache.hadoop=DEBUG, console
org.apache.hive=INFO, console
org.apache.hcatalog=INFO, console
metadata.root.logger=DEBUG,console,DRFA
hive.root.logger=INFO,console,DRFA
hcatalog.root.logger=INFO,console,DRFA
metadata.log.dir=${user.dir}/metadata/logs
metadata.log.file=metadata.log
log4j.rootLogger=${metadata.root.logger}
#
# DRFA
# Daily Rolling File Appender
#
log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DRFA.File=${metadata.log.dir}/${metadata.log.file}
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.DRFA.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
#
# console
# Add "console" to rootlogger above if you want to use this
#
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>mapreduce.admin.map.child.java.opts</name>
<value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>mapreduce.admin.reduce.child.java.opts</name>
<value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>mapreduce.admin.user.env</name>
<value>LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64</value>
</property>
<property>
<name>mapreduce.am.max-attempts</name>
<value>2</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure</value>
</property>
<property>
<name>mapreduce.application.framework.path</name>
<value>/hdp/apps/${hdp.version}/mapreduce/mapreduce.tar.gz#mr-framework</value>
</property>
<property>
<name>mapreduce.cluster.administrators</name>
<value> hadoop</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.emit-timeline-data</name>
<value>false</value>
</property>
<property>
<name>mapreduce.job.reduce.slowstart.completedmaps</name>
<value>0.05</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>c6501.ambari.apache.org:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/mr-history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/mr-history/tmp</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>c6501.ambari.apache.org:19888</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx546m</value>
</property>
<property>
<name>mapreduce.map.log.level</name>
<value>INFO</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>682</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>false</value>
</property>
<property>
<name>mapreduce.map.sort.spill.percent</name>
<value>0.7</value>
</property>
<property>
<name>mapreduce.map.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress</name>
<value>false</value>
</property>
<property>
<name>mapreduce.output.fileoutputformat.compress.type</name>
<value>BLOCK</value>
</property>
<property>
<name>mapreduce.reduce.input.buffer.percent</name>
<value>0.0</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx546m</value>
</property>
<property>
<name>mapreduce.reduce.log.level</name>
<value>INFO</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>682</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.enabled</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.interval-ms</name>
<value>1000</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.fetch.retry.timeout-ms</name>
<value>30000</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.input.buffer.percent</name>
<value>0.7</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.merge.percent</name>
<value>0.66</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopies</name>
<value>30</value>
</property>
<property>
<name>mapreduce.reduce.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>13562</value>
</property>
<property>
<name>mapreduce.task.io.sort.factor</name>
<value>100</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>273</value>
</property>
<property>
<name>mapreduce.task.timeout</name>
<value>300000</value>
</property>
<property>
<name>yarn.app.mapreduce.am.admin-command-opts</name>
<value>-Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx546m -Dhdp.version=${hdp.version}</value>
</property>
<property>
<name>yarn.app.mapreduce.am.log.level</name>
<value>INFO</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>682</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
</configuration>
<!--Wed Feb 4 03:23:58 2015-->
<configuration>
<property>
<name>hadoop.registry.rm.enabled</name>
<value>true</value>
</property>
<property>
<name>hadoop.registry.zk.quorum</name>
<value>c6501.ambari.apache.org:2181</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>false</value>
</property>
<property>
<name>yarn.admin.acl</name>
<value></value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR,/usr/hdp/current/hadoop-client/*,/usr/hdp/current/hadoop-client/lib/*,/usr/hdp/current/hadoop-hdfs-client/*,/usr/hdp/current/hadoop-hdfs-client/lib/*,/usr/hdp/current/hadoop-yarn-client/*,/usr/hdp/current/hadoop-yarn-client/lib/*</value>
</property>
<property>
<name>yarn.client.nodemanager-connect.max-wait-ms</name>
<value>900000</value>
</property>
<property>
<name>yarn.client.nodemanager-connect.retry-interval-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>2592000</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://c6501.ambari.apache.org:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.node-labels.fs-store.retry-policy-spec</name>
<value>2000, 500</value>
</property>
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>/system/yarn/node-labels</value>
</property>
<property>
<name>yarn.node-labels.manager-class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.nodelabels.MemoryRMNodeLabelsManager</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:45454</value>
</property>
<property>
<name>yarn.nodemanager.admin-env</name>
<value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.container-executor.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value>
</property>
<property>
<name>yarn.nodemanager.container-monitor.interval-ms</name>
<value>3000</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
<value>90</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
<value>1000</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
<value>0.25</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.interval-ms</name>
<value>135000</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.script.timeout-ms</name>
<value>60000</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
<value>hadoop-yarn</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.group</name>
<value>hadoop</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/hadoop/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.compression-type</name>
<value>gz</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.debug-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.num-log-files-per-app</name>
<value>30</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
<value>-1</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/hadoop/yarn/log</value>
</property>
<property>
<name>yarn.nodemanager.log.retain-second</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.recovery.dir</name>
<value>/var/log/hadoop-yarn/nodemanager/recovery-state</value>
</property>
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/app-logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name>
<value>100</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>c6501.ambari.apache.org:8050</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>c6501.ambari.apache.org:8141</value>
</property>
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>2</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.resourcemanager.connect.max-wait.ms</name>
<value>900000</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>30000</value>
</property>
<property>
<name>yarn.resourcemanager.fs.state-store.retry-policy-spec</name>
<value>2000, 500</value>
</property>
<property>
<name>yarn.resourcemanager.fs.state-store.uri</name>
<value> </value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>c6501.ambari.apache.org</value>
</property>
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/etc/hadoop/conf/yarn.exclude</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>c6501.ambari.apache.org:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>c6501.ambari.apache.org:8030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name>yarn.resourcemanager.state-store.max-completed-applications</name>
<value>${yarn.resourcemanager.max-completed-applications}</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size</name>
<value>10</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>c6501.ambari.apache.org:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-acl</name>
<value>world:anyone:rwcda</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>localhost:2181</value>
</property>
<property>
<name>yarn.resourcemanager.zk-num-retries</name>
<value>1000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-retry-interval-ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-state-store.parent-path</name>
<value>/rmstore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-timeout-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>682</value>
</property>
<property>
<name>yarn.timeline-service.address</name>
<value>c6501.ambari.apache.org:10200</value>
</property>
<property>
<name>yarn.timeline-service.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.timeline-service.client.max-retries</name>
<value>30</value>
</property>
<property>
<name>yarn.timeline-service.client.retry-interval-ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.store-class</name>
<value>org.apache.hadoop.yarn.server.applicationhistoryservice.NullApplicationHistoryStore</value>
</property>
<property>
<name>yarn.timeline-service.http-authentication.simple.anonymous.allowed</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.http-authentication.type</name>
<value>simple</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.path</name>
<value>/hadoop/yarn/timeline</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.read-cache-size</name>
<value>104857600</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size</name>
<value>10000</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size</name>
<value>10000</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms</name>
<value>300000</value>
</property>
<property>
<name>yarn.timeline-service.store-class</name>
<value>org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore</value>
</property>
<property>
<name>yarn.timeline-service.ttl-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.ttl-ms</name>
<value>2678400000</value>
</property>
<property>
<name>yarn.timeline-service.webapp.address</name>
<value>c6501.ambari.apache.org:8188</value>
</property>
<property>
<name>yarn.timeline-service.webapp.https.address</name>
<value>c6501.ambari.apache.org:8190</value>
</property>
</configuration>
......@@ -23,8 +23,11 @@ import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.Vertex;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.cli.CliDriver;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.service.HiveClient;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.repository.graph.GraphBackedMetadataRepository;
......@@ -40,8 +43,13 @@ import org.testng.annotations.Test;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.util.List;
import java.util.Properties;
@Test (enabled = false)
public class HiveGraphRepositoryTest {
......@@ -49,6 +57,8 @@ public class HiveGraphRepositoryTest {
protected HiveTypeSystem hts;
private GraphBackedMetadataRepository repository;
private GraphService gs;
public static final String HIVE_L4J_PROPS = "target/hive-log4j.properties";
public static final String HIVE_EXEC_L4J_PROPS = "target/hive-exec-log4j.properties";
private static final Logger LOG =
LoggerFactory.getLogger(HiveGraphRepositoryTest.class);
......@@ -79,8 +89,10 @@ public class HiveGraphRepositoryTest {
@Test (enabled = false)
public void testHiveImport() throws Exception {
HiveImporter hImporter = new HiveImporter(repository, hts, new HiveMetaStoreClient(new HiveConf()));
HiveConf conf = new HiveConf();
HiveMetaStoreClient hiveMetaStoreClient;
hiveMetaStoreClient = new HiveMetaStoreClient(conf);
HiveImporter hImporter = new HiveImporter(repository, hts, hiveMetaStoreClient);
hImporter.importHiveMetadata();
LOG.info("Defined DB instances");
File f = new File("./target/logs/hiveobjs.txt");
......@@ -136,4 +148,5 @@ public class HiveGraphRepositoryTest {
bw.flush();
bw.close();
}
}
......@@ -21,11 +21,15 @@ package org.apache.hadoop.metadata.hivetypes;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.ql.plan.api.QueryPlan;
import org.apache.hadoop.hive.service.HiveClient;
import org.apache.hadoop.metadata.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.MetadataException;
import org.apache.hadoop.metadata.storage.Id;
import org.apache.hadoop.metadata.storage.memory.MemRepository;
import org.apache.hadoop.metadata.types.TypeSystem;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.transport.TSocket;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.BeforeClass;
......@@ -35,13 +39,17 @@ import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
@Test (enabled = false)
@Test (enabled = true)
public class HiveTypeSystemTest {
protected MemRepository mr;
protected HiveTypeSystem hts;
private static final String hiveHost = "c6501.ambari.apache.org";
private static final short hivePort = 10000;
private static final Logger LOG =
LoggerFactory.getLogger(HiveTypeSystemTest.class);
......@@ -54,11 +62,14 @@ public class HiveTypeSystemTest {
hts = HiveTypeSystem.getInstance();
}
@Test (enabled = false)
@Test (enabled = true)
public void testHiveImport() throws MetaException, MetadataException, IOException {
HiveImporter hImporter = new HiveImporter(mr, hts, new HiveMetaStoreClient(new HiveConf()));
HiveConf conf = new HiveConf();
HiveMetaStoreClient hiveMetaStoreClient;
hiveMetaStoreClient = new HiveMetaStoreClient(conf);
HiveImporter hImporter = new HiveImporter(mr, hts, hiveMetaStoreClient);
hImporter.importHiveMetadata();
LOG.info("Defined DB instances");
File f = new File("./target/logs/hiveobjs.txt");
f.getParentFile().mkdirs();
......@@ -95,4 +106,20 @@ public class HiveTypeSystemTest {
bw.flush();
bw.close();
}
@Test (enabled = true)
public void testHiveLineage() throws MetaException, MetadataException, IOException, Exception {
Class.forName("org.apache.hive.jdbc.HiveDriver");
String url = "jdbc:hive2://" + hiveHost + ":" + hivePort;
Connection con = DriverManager.getConnection(url, "ambari-qa", "");
Statement stmt = con.createStatement();
stmt.execute("drop table if exists t");
stmt.execute("create table t(a int, b string)");
stmt.execute("drop table if exists t2");
stmt.execute("create table t2 as select * from t");
}
}
\ No newline at end of file
drop table if exists t;
create table t(a int, b string);
drop table if exists t2;
create table t2 as select * from t;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment