Commit 3e695bad by Shwetha GS

handling all hive query operations in hive hook

parent 1abeba45
......@@ -40,25 +40,6 @@
<dependencies>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-client</artifactId>
<version>${version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
<scope>runtime</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minikdc</artifactId>
<version>${hadoop.version}</version>
......@@ -99,6 +80,25 @@
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-client</artifactId>
<version>${version}</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
<scope>runtime</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop.metadata</groupId>
<artifactId>metadata-typesystem</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
......
......@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.MetadataServiceException;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.Referenceable;
......@@ -91,31 +92,6 @@ public class HiveMetaStoreBridge {
}
}
/**
* Gets reference for the database
*
*
* @param databaseName
* @param clusterName cluster name
* @return Reference for database if exists, else null
* @throws Exception
*/
private Referenceable getDatabaseReference(String databaseName, String clusterName) throws Exception {
LOG.debug("Getting reference for database {}", databaseName);
String typeName = HiveDataTypes.HIVE_DB.getName();
MetadataServiceClient dgiClient = getMetadataServiceClient();
String dslQuery = String.format("%s where name = '%s' and clusterName = '%s'",
HiveDataTypes.HIVE_DB.getName(), databaseName, clusterName);
JSONArray results = dgiClient.searchByDSL(dslQuery);
if (results.length() == 0) {
return null;
} else {
String guid = getGuidFromDSLResponse(results.getJSONObject(0));
return new Referenceable(guid, typeName, null);
}
}
public Referenceable registerDatabase(String databaseName) throws Exception {
Referenceable dbRef = getDatabaseReference(databaseName, clusterName);
if (dbRef == null) {
......@@ -169,6 +145,35 @@ public class HiveMetaStoreBridge {
}
/**
* Gets reference for the database
*
*
* @param databaseName
* @param clusterName cluster name
* @return Reference for database if exists, else null
* @throws Exception
*/
private Referenceable getDatabaseReference(String databaseName, String clusterName) throws Exception {
LOG.debug("Getting reference for database {}", databaseName);
String typeName = HiveDataTypes.HIVE_DB.getName();
String dslQuery = String.format("%s where name = '%s' and clusterName = '%s'", HiveDataTypes.HIVE_DB.getName(),
databaseName, clusterName);
return getEntityReferenceFromDSL(typeName, dslQuery);
}
private Referenceable getEntityReferenceFromDSL(String typeName, String dslQuery) throws Exception {
MetadataServiceClient dgiClient = getMetadataServiceClient();
JSONArray results = dgiClient.searchByDSL(dslQuery);
if (results.length() == 0) {
return null;
} else {
String guid = getGuidFromDSLResponse(results.getJSONObject(0));
return new Referenceable(guid, typeName, null);
}
}
/**
* Gets reference for the table
*
* @param dbName
......@@ -180,19 +185,47 @@ public class HiveMetaStoreBridge {
LOG.debug("Getting reference for table {}.{}", dbName, tableName);
String typeName = HiveDataTypes.HIVE_TABLE.getName();
MetadataServiceClient dgiClient = getMetadataServiceClient();
String query = String.format("%s where name = '%s', dbName where name = '%s' and clusterName = '%s'",
HiveDataTypes.HIVE_TABLE.getName(), tableName, dbName, clusterName);
JSONArray results = dgiClient.searchByDSL(query);
// String dslQuery = String.format("%s as t where name = '%s' dbName where name = '%s' and "
// + "clusterName = '%s' select t",
// HiveDataTypes.HIVE_TABLE.getName(), tableName, dbName, clusterName);
String dbType = HiveDataTypes.HIVE_DB.getName();
String gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.name', '%s').as('t').out"
+ "('__%s.dbName').has('%s.name', '%s').has('%s.clusterName', '%s').back('t').toList()",
typeName, typeName, tableName, typeName, dbType, dbName, dbType, clusterName);
return getEntityReferenceFromGremlin(typeName, gremlinQuery);
}
private Referenceable getEntityReferenceFromGremlin(String typeName, String gremlinQuery) throws MetadataServiceException,
JSONException {
MetadataServiceClient client = getMetadataServiceClient();
JSONObject response = client.searchByGremlin(gremlinQuery);
JSONArray results = response.getJSONArray(MetadataServiceClient.RESULTS);
if (results.length() == 0) {
return null;
} else {
//There should be just one instance with the given name
String guid = getGuidFromDSLResponse(results.getJSONObject(0));
LOG.debug("Got reference for table {}.{} = {}", dbName, tableName, guid);
return new Referenceable(guid, typeName, null);
}
String guid = results.getJSONObject(0).getString("__guid");
return new Referenceable(guid, typeName, null);
}
private Referenceable getPartitionReference(String dbName, String tableName, List<String> values) throws Exception {
String valuesStr = "['" + StringUtils.join(values, "', '") + "']";
LOG.debug("Getting reference for partition for {}.{} with values {}", dbName, tableName, valuesStr);
String typeName = HiveDataTypes.HIVE_PARTITION.getName();
// String dslQuery = String.format("%s as p where values = %s, tableName where name = '%s', "
// + "dbName where name = '%s' and clusterName = '%s' select p", typeName, valuesStr, tableName,
// dbName, clusterName);
String dbType = HiveDataTypes.HIVE_DB.getName();
String tableType = HiveDataTypes.HIVE_TABLE.getName();
String gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', %s).as('p')."
+ "out('__%s.tableName').has('%s.name', '%s').out('__%s.dbName').has('%s.name', '%s')"
+ ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, valuesStr, typeName,
tableType, tableName, tableType, dbType, dbName, dbType, clusterName);
return getEntityReferenceFromGremlin(typeName, gremlinQuery);
}
private String getGuidFromDSLResponse(JSONObject jsonObject) throws JSONException {
......@@ -292,31 +325,48 @@ public class HiveMetaStoreBridge {
}
}
//todo should be idempotent
public Referenceable registerPartition(Partition partition) throws Exception {
String dbName = partition.getTable().getDbName();
String tableName = partition.getTable().getTableName();
Referenceable dbRef = registerDatabase(dbName);
Referenceable tableRef = registerTable(dbName, tableName);
Referenceable sdRef = getSDForTable(dbName, tableName);
return importPartition(partition, dbRef, tableRef, sdRef);
}
private Referenceable importPartition(Partition hivePart,
Referenceable dbReferenceable,
Referenceable tableReferenceable,
Referenceable sdReferenceable) throws Exception {
LOG.info("Importing partition for {}.{} with values {}", dbReferenceable, tableReferenceable,
StringUtils.join(hivePart.getValues(), ","));
Referenceable partRef = new Referenceable(HiveDataTypes.HIVE_PARTITION.getName());
partRef.set("values", hivePart.getValues());
String dbName = hivePart.getTable().getDbName();
String tableName = hivePart.getTable().getTableName();
partRef.set("dbName", dbReferenceable);
partRef.set("tableName", tableReferenceable);
Referenceable partRef = getPartitionReference(dbName, tableName, hivePart.getValues());
if (partRef == null) {
partRef = new Referenceable(HiveDataTypes.HIVE_PARTITION.getName());
partRef.set("values", hivePart.getValues());
//todo fix
partRef.set("createTime", hivePart.getLastAccessTime());
partRef.set("lastAccessTime", hivePart.getLastAccessTime());
partRef.set("dbName", dbReferenceable);
partRef.set("tableName", tableReferenceable);
// sdStruct = fillStorageDescStruct(hivePart.getSd());
// Instead of creating copies of the sdstruct for partitions we are reusing existing
// ones will fix to identify partitions with differing schema.
partRef.set("sd", sdReferenceable);
//todo fix
partRef.set("createTime", hivePart.getLastAccessTime());
partRef.set("lastAccessTime", hivePart.getLastAccessTime());
partRef.set("parameters", hivePart.getParameters());
// sdStruct = fillStorageDescStruct(hivePart.getSd());
// Instead of creating copies of the sdstruct for partitions we are reusing existing
// ones will fix to identify partitions with differing schema.
partRef.set("sd", sdReferenceable);
return createInstance(partRef);
partRef.set("parameters", hivePart.getParameters());
partRef = createInstance(partRef);
} else {
LOG.info("Partition {}.{} with values {} is already registered with id {}", dbName, tableName,
StringUtils.join(hivePart.getValues(), ","), partRef.getId().id);
}
return partRef;
}
private void importIndexes(String db, String table,
......
......@@ -189,37 +189,48 @@ public class HiveHook implements ExecuteWithHookContext {
switch (event.operation) {
case CREATEDATABASE:
Set<WriteEntity> outputs = event.outputs;
for (WriteEntity entity : outputs) {
if (entity.getType() == Entity.Type.DATABASE) {
dgiBridge.registerDatabase(entity.getDatabase().getName());
}
}
handleCreateDB(dgiBridge, event);
break;
case CREATETABLE:
outputs = event.outputs;
for (WriteEntity entity : outputs) {
if (entity.getType() == Entity.Type.TABLE) {
Table table = entity.getTable();
//TODO table.getDbName().toLowerCase() is required as hive stores in lowercase,
// but table.getDbName() is not lowercase
Referenceable dbReferenceable = dgiBridge.registerDatabase(table.getDbName().toLowerCase());
dgiBridge.registerTable(dbReferenceable, table.getDbName(), table.getTableName());
}
}
handleCreateTable(dgiBridge, event);
break;
case CREATETABLE_AS_SELECT:
registerCTAS(dgiBridge, event);
case CREATEVIEW:
case LOAD:
case EXPORT:
case IMPORT:
case QUERY:
registerProcess(dgiBridge, event);
break;
default:
}
}
private void registerCTAS(HiveMetaStoreBridge dgiBridge, HiveEvent event) throws Exception {
private void handleCreateTable(HiveMetaStoreBridge dgiBridge, HiveEvent event) throws Exception {
for (WriteEntity entity : event.outputs) {
if (entity.getType() == Entity.Type.TABLE) {
Table table = entity.getTable();
//TODO table.getDbName().toLowerCase() is required as hive stores in lowercase,
// but table.getDbName() is not lowercase
Referenceable dbReferenceable = dgiBridge.registerDatabase(table.getDbName().toLowerCase());
dgiBridge.registerTable(dbReferenceable, table.getDbName(), table.getTableName());
}
}
}
private void handleCreateDB(HiveMetaStoreBridge dgiBridge, HiveEvent event) throws Exception {
for (WriteEntity entity : event.outputs) {
if (entity.getType() == Entity.Type.DATABASE) {
dgiBridge.registerDatabase(entity.getDatabase().getName());
}
}
}
private void registerProcess(HiveMetaStoreBridge dgiBridge, HiveEvent event) throws Exception {
Set<ReadEntity> inputs = event.inputs;
Set<WriteEntity> outputs = event.outputs;
......@@ -243,7 +254,7 @@ public class HiveHook implements ExecuteWithHookContext {
processReferenceable.set("userName", event.user);
List<Referenceable> source = new ArrayList<>();
for (ReadEntity readEntity : inputs) {
if (readEntity.getTyp() == Entity.Type.TABLE) {
if (readEntity.getType() == Entity.Type.TABLE) {
Table table = readEntity.getTable();
String dbName = table.getDbName().toLowerCase();
source.add(dgiBridge.registerTable(dbName, table.getTableName()));
......@@ -252,11 +263,14 @@ public class HiveHook implements ExecuteWithHookContext {
processReferenceable.set("inputTables", source);
List<Referenceable> target = new ArrayList<>();
for (WriteEntity writeEntity : outputs) {
if (writeEntity.getTyp() == Entity.Type.TABLE) {
if (writeEntity.getType() == Entity.Type.TABLE || writeEntity.getType() == Entity.Type.PARTITION) {
Table table = writeEntity.getTable();
String dbName = table.getDbName().toLowerCase();
target.add(dgiBridge.registerTable(dbName, table.getTableName()));
}
if (writeEntity.getType() == Entity.Type.PARTITION) {
dgiBridge.registerPartition(writeEntity.getPartition());
}
}
processReferenceable.set("outputTables", target);
processReferenceable.set("queryText", queryStr);
......
......@@ -26,10 +26,13 @@ import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
public class HiveHookIT {
private static final String DGI_URL = "http://localhost:21000/";
private static final String CLUSTER_NAME = "test";
......@@ -59,6 +62,9 @@ public class HiveHookIT {
hiveConf.set("javax.jdo.option.ConnectionURL", "jdbc:derby:./target/metastore_db;create=true");
hiveConf.set("hive.hook.dgi.synchronous", "true");
hiveConf.set(HiveMetaStoreBridge.HIVE_CLUSTER_NAME, CLUSTER_NAME);
//weird, hive prepends test_ to table name
hiveConf.set("hive.test.mode", "true");
hiveConf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem");
return hiveConf;
}
......@@ -69,7 +75,7 @@ public class HiveHookIT {
@Test
public void testCreateDatabase() throws Exception {
String dbName = "db" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
String dbName = "db" + random();
runCommand("create database " + dbName);
assertDatabaseIsRegistered(dbName);
......@@ -77,15 +83,15 @@ public class HiveHookIT {
@Test
public void testCreateTable() throws Exception {
String dbName = "db" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
String dbName = "db" + random();
runCommand("create database " + dbName);
String tableName = "table" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
String tableName = "table" + random();
runCommand("create table " + dbName + "." + tableName + "(id int, name string)");
assertTableIsRegistered(dbName, tableName);
tableName = "table" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
runCommand("create table " + tableName + "(id int, name string)");
tableName = "table" + random();
runCommand("create table " + tableName + "(id int, name string) partitioned by(dt string)");
assertTableIsRegistered("default", tableName);
//Create table where database doesn't exist, will create database instance as well
......@@ -94,10 +100,10 @@ public class HiveHookIT {
@Test
public void testCTAS() throws Exception {
String tableName = "table" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
String tableName = "table" + random();
runCommand("create table " + tableName + "(id int, name string)");
String ctasTableName = "table" + RandomStringUtils.randomAlphanumeric(5).toLowerCase();
String ctasTableName = "table" + random();
String query = "create table " + ctasTableName + " as select * from " + tableName;
runCommand(query);
......@@ -105,24 +111,125 @@ public class HiveHookIT {
assertProcessIsRegistered(query);
}
@Test
public void testCreateView() throws Exception {
String tableName = "table" + random();
runCommand("create table " + tableName + "(id int, name string)");
String viewName = "table" + random();
String query = "create view " + viewName + " as select * from " + tableName;
runCommand(query);
assertTableIsRegistered("default", viewName);
assertProcessIsRegistered(query);
}
@Test
public void testLoadData() throws Exception {
String tableName = "table" + random();
runCommand("create table test_" + tableName + "(id int, name string)");
String loadFile = file("load");
String query = "load data local inpath 'file://" + loadFile + "' into table " + tableName;
runCommand(query);
assertProcessIsRegistered(query);
}
@Test
public void testInsert() throws Exception {
String tableName = "table" + random();
runCommand("create table " + tableName + "(id int, name string) partitioned by(dt string)");
String insertTableName = "table" + random();
runCommand("create table test_" + insertTableName + "(name string) partitioned by(dt string)");
String query = "insert into " + insertTableName + " partition(dt = '2015-01-01') select name from "
+ tableName + " where dt = '2015-01-01'";
runCommand(query);
assertProcessIsRegistered(query);
assertPartitionIsRegistered("default", "test_" + insertTableName, "2015-01-01");
}
private String random() {
return RandomStringUtils.randomAlphanumeric(5).toLowerCase();
}
private String file(String tag) throws Exception {
String filename = "./target/" + tag + "-data-" + random();
File file = new File(filename);
file.createNewFile();
return file.getAbsolutePath();
}
private String mkdir(String tag) throws Exception {
String filename = "./target/" + tag + "-data-" + random();
File file = new File(filename);
file.mkdirs();
return file.getAbsolutePath();
}
@Test
public void testExportImport() throws Exception {
String tableName = "table" + random();
runCommand("create table test_" + tableName + "(name string)");
String filename = "pfile://" + mkdir("export");
String query = "export table " + tableName + " to '" + filename + "'";
runCommand(query);
assertProcessIsRegistered(query);
tableName = "table" + random();
runCommand("create table " + tableName + "(name string)");
query = "import table " + tableName + " from '" + filename + "'";
runCommand(query);
assertProcessIsRegistered(query);
}
@Test
public void testSelect() throws Exception {
String tableName = "table" + random();
runCommand("create table " + tableName + "(id int, name string)");
String query = "select * from " + tableName;
runCommand(query);
assertProcessIsRegistered(query);
}
private void assertProcessIsRegistered(String queryStr) throws Exception {
String dslQuery = String.format("%s where queryText = '%s'", HiveDataTypes.HIVE_PROCESS.getName(), queryStr);
assertInstanceIsRegistered(dslQuery);
String dslQuery = String.format("%s where queryText = \"%s\"", HiveDataTypes.HIVE_PROCESS.getName(), queryStr);
assertEntityIsRegistered(dslQuery);
}
private void assertTableIsRegistered(String dbName, String tableName) throws Exception {
String query = String.format("%s where name = '%s', dbName where name = '%s' and clusterName = '%s'",
HiveDataTypes.HIVE_TABLE.getName(), tableName, dbName, CLUSTER_NAME);
assertInstanceIsRegistered(query);
assertEntityIsRegistered(query);
}
private void assertDatabaseIsRegistered(String dbName) throws Exception {
String query = String.format("%s where name = '%s' and clusterName = '%s'", HiveDataTypes.HIVE_DB.getName(),
dbName, CLUSTER_NAME);
assertInstanceIsRegistered(query);
assertEntityIsRegistered(query);
}
private void assertPartitionIsRegistered(String dbName, String tableName, String value) throws Exception {
String typeName = HiveDataTypes.HIVE_PARTITION.getName();
String dbType = HiveDataTypes.HIVE_DB.getName();
String tableType = HiveDataTypes.HIVE_TABLE.getName();
String gremlinQuery = String.format("g.V.has('__typeName', '%s').has('%s.values', ['%s']).as('p')."
+ "out('__%s.tableName').has('%s.name', '%s').out('__%s.dbName').has('%s.name', '%s')"
+ ".has('%s.clusterName', '%s').back('p').toList()", typeName, typeName, value, typeName,
tableType, tableName, tableType, dbType, dbName, dbType, CLUSTER_NAME);
JSONObject response = dgiCLient.searchByGremlin(gremlinQuery);
JSONArray results = response.getJSONArray(MetadataServiceClient.RESULTS);
Assert.assertEquals(results.length(), 1);
}
private void assertInstanceIsRegistered(String dslQuery) throws Exception{
private void assertEntityIsRegistered(String dslQuery) throws Exception{
JSONArray results = dgiCLient.searchByDSL(dslQuery);
Assert.assertEquals(results.length(), 1);
}
......
......@@ -200,7 +200,7 @@ public class MetadataServiceClient {
public Referenceable getEntity(String guid) throws MetadataServiceException {
JSONObject jsonResponse = callAPI(API.GET_ENTITY, null, guid);
try {
String entityInstanceDefinition = jsonResponse.getString(MetadataServiceClient.GUID);
String entityInstanceDefinition = jsonResponse.getString(MetadataServiceClient.DEFINITION);
return InstanceSerialization.fromJsonReferenceable(entityInstanceDefinition, true);
} catch (JSONException e) {
throw new MetadataServiceException(e);
......
......@@ -101,6 +101,9 @@
<StagingId>apache-staging</StagingId>
<StagingName>Apache Release Distribution Repository</StagingName>
<StagingUrl>https://repository.apache.org/content/groups/staging</StagingUrl>
<!-- skips checkstyle and find bugs -->
<skipCheck>false</skipCheck>
</properties>
<profiles>
......@@ -971,6 +974,7 @@
</goals>
<phase>verify</phase>
<configuration>
<skip>${skipCheck}</skip>
<consoleOutput>true</consoleOutput>
<includeTestSourceDirectory>true</includeTestSourceDirectory>
<configLocation>src/build/checkstyle.xml</configLocation>
......@@ -988,6 +992,7 @@
<!--debug>true</debug -->
<xmlOutput>true</xmlOutput>
<failOnError>false</failOnError>
<skip>${skipCheck}</skip>
</configuration>
<executions>
<execution>
......
......@@ -23,6 +23,8 @@ import com.thinkaurelius.titan.core.TitanIndexQuery;
import com.thinkaurelius.titan.core.TitanProperty;
import com.thinkaurelius.titan.core.TitanVertex;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.gremlin.groovy.Gremlin;
import com.tinkerpop.gremlin.java.GremlinPipeline;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.discovery.DiscoveryException;
import org.apache.hadoop.metadata.discovery.DiscoveryService;
......
......@@ -199,7 +199,7 @@ public class GraphBackedSearchIndexer implements SearchIndexer {
break;
case ENUM:
createVertexMixedIndex(propertyName, Integer.class);
createVertexMixedIndex(propertyName, String.class);
break;
case ARRAY:
......
......@@ -85,7 +85,7 @@
</logger>
<root>
<priority value="debug"/>
<priority value="info"/>
<appender-ref ref="console"/>
</root>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment