Commit 91cfa6a6 by Shwetha GS

Merge branch 'master' into dal

parents dd9a76b0 01ee72a3
......@@ -90,15 +90,9 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
......
......@@ -59,9 +59,11 @@ for i in "${BASEDIR}/bridge/hive/"*.jar; do
METADATACPPATH="${METADATACPPATH}:$i"
done
echo $METADATACPPATH
# log dir for applications
METADATA_LOG_DIR="${METADATA_LOG_DIR:-$BASEDIR/logs}"
export METADATA_LOG_DIR
JAVA_PROPERTIES="$METADATA_OPTS"
JAVA_PROPERTIES="$METADATA_OPTS -Dmetadata.log.dir=$METADATA_LOG_DIR -Dmetadata.log.file=import-hive.log"
shift
while [[ ${1} =~ ^\-D ]]; do
......@@ -70,6 +72,7 @@ while [[ ${1} =~ ^\-D ]]; do
done
TIME=`date +%Y%m%d%H%M%s`
#Add hive conf in classpath
if [ ! -z "$HIVE_CONF_DIR" ]; then
HIVE_CP=$HIVE_CONF_DIR
elif [ ! -z "$HIVE_HOME" ]; then
......@@ -86,5 +89,5 @@ echo Using Hive configuration directory [$HIVE_CP]
${JAVA_BIN} ${JAVA_PROPERTIES} -cp ${HIVE_CP}:${METADATACPPATH} org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge
RETVAL=$?
[ $RETVAL -eq 0 ] && echo Hive Data Model Imported!!!
[ $RETVAL -ne 0 ] && echo Failure in Hive Data Model import!!!
[ $RETVAL -eq 0 ] && echo Hive Data Model imported successfully!!!
[ $RETVAL -ne 0 ] && echo Failed to import Hive Data Model!!!
......@@ -155,7 +155,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
// clone to avoid concurrent access
final HiveConf conf = new HiveConf(hookContext.getConf());
boolean debug = conf.get("debug", "false").equals("true");
boolean debug = conf.get("hive.hook.dgi.synchronous", "false").equals("true");
if (debug) {
fireAndForget(hookContext, conf);
......@@ -178,8 +178,9 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
private void fireAndForget(HookContext hookContext, HiveConf conf) throws Exception {
assert hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK : "Non-POST_EXEC_HOOK not supported!";
LOG.info("Entered DGI hook for hook type {} operation {}", hookContext.getHookType(),
hookContext.getOperationName());
HiveOperation operation = HiveOperation.valueOf(hookContext.getOperationName());
LOG.info("Entered DGI hook for hook type {} operation {}", hookContext.getHookType(), operation);
HiveMetaStoreBridge dgiBridge = new HiveMetaStoreBridge(conf);
......@@ -206,7 +207,7 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
Table table = entity.getTable();
//TODO table.getDbName().toLowerCase() is required as hive stores in lowercase,
// but table.getDbName() is not lowercase
Referenceable dbReferenceable = getDatabaseReference(dgiBridge, table.getDbName().toLowerCase());
Referenceable dbReferenceable = dgiBridge.registerDatabase(table.getDbName().toLowerCase());
dgiBridge.registerTable(dbReferenceable, table.getDbName(), table.getTableName());
}
}
......@@ -229,7 +230,8 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
LOG.info("Explain statement. Skipping...");
}
String user = hookContext.getUserName();
//todo hookContext.getUserName() is null in hdp sandbox 2.2.4
String user = hookContext.getUserName() == null ? System.getProperty("user.name") : hookContext.getUserName();
HiveOperation operation = HiveOperation.valueOf(hookContext.getOperationName());
String queryId = null;
String queryStr = null;
......@@ -252,19 +254,19 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
if (readEntity.getTyp() == Entity.Type.TABLE) {
Table table = readEntity.getTable();
String dbName = table.getDbName().toLowerCase();
source.add(getTableReference(dgiBridge, dbName, table.getTableName()));
source.add(dgiBridge.registerTable(dbName, table.getTableName()));
}
}
processReferenceable.set("sourceTableNames", source);
processReferenceable.set("inputTables", source);
List<Referenceable> target = new ArrayList<>();
for (WriteEntity writeEntity : outputs) {
if (writeEntity.getTyp() == Entity.Type.TABLE) {
Table table = writeEntity.getTable();
String dbName = table.getDbName().toLowerCase();
target.add(getTableReference(dgiBridge, dbName, table.getTableName()));
target.add(dgiBridge.registerTable(dbName, table.getTableName()));
}
}
processReferenceable.set("targetTableNames", target);
processReferenceable.set("outputTables", target);
processReferenceable.set("queryText", queryStr);
processReferenceable.set("queryId", queryId);
processReferenceable.set("queryPlan", getQueryPlan(hookContext, conf));
......@@ -275,58 +277,6 @@ public class HiveHook implements ExecuteWithHookContext, HiveSemanticAnalyzerHoo
dgiBridge.createInstance(processReferenceable);
}
/**
* Gets reference for the database. Creates new instance if it doesn't exist
*
* @param dgiBridge
* @param dbName database name
* @return Reference for database
* @throws Exception
*/
private Referenceable getDatabaseReference(HiveMetaStoreBridge dgiBridge, String dbName) throws Exception {
String typeName = HiveDataTypes.HIVE_DB.getName();
MetadataServiceClient dgiClient = dgiBridge.getMetadataServiceClient();
JSONObject result = dgiClient.rawSearch(typeName, "name", dbName);
JSONArray results = (JSONArray) result.get("results");
if (results.length() == 0) {
//Create new instance
return dgiBridge.registerDatabase(dbName);
} else {
String guid = (String) ((JSONObject) results.get(0)).get("guid");
return new Referenceable(guid, typeName, null);
}
}
/**
* Gets reference for the table. Creates new instance if it doesn't exist
*
* @param dgiBridge
* @param dbName
* @param tableName table name
* @return table reference
* @throws Exception
*/
private Referenceable getTableReference(HiveMetaStoreBridge dgiBridge, String dbName, String tableName) throws Exception {
String typeName = HiveDataTypes.HIVE_TABLE.getName();
MetadataServiceClient dgiClient = dgiBridge.getMetadataServiceClient();
JSONObject result = dgiClient.rawSearch(typeName, "tableName", tableName);
JSONArray results = (JSONArray) result.get("results");
if (results.length() == 0) {
Referenceable dbRererence = getDatabaseReference(dgiBridge, dbName);
return dgiBridge.registerTable(dbRererence, dbName, tableName).first;
} else {
//There should be just one instance with the given name
String guid = (String) ((JSONObject) results.get(0)).get("guid");
return new Referenceable(guid, typeName, null);
}
}
private String getQueryPlan(HookContext hookContext, HiveConf conf) throws Exception {
//We need to somehow get the sem associated with the plan and use it here.
......
......@@ -340,8 +340,8 @@ public class HiveDataModelGenerator {
private void createPartitionClass() throws MetadataException {
AttributeDefinition[] attributeDefinitions = new AttributeDefinition[]{
new AttributeDefinition("values", DataTypes.STRING_TYPE.getName(),
Multiplicity.COLLECTION, false, null),
new AttributeDefinition("values", DataTypes.arrayTypeName(DataTypes.STRING_TYPE.getName()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("dbName", HiveDataTypes.HIVE_DB.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("tableName", HiveDataTypes.HIVE_TABLE.getName(),
......@@ -354,10 +354,9 @@ public class HiveDataModelGenerator {
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("columns",
DataTypes.arrayTypeName(HiveDataTypes.HIVE_COLUMN.getName()),
Multiplicity.COLLECTION, true, null),
Multiplicity.OPTIONAL, true, null),
new AttributeDefinition("parameters", STRING_MAP_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
};
HierarchicalTypeDefinition<ClassType> definition =
new HierarchicalTypeDefinition<>(ClassType.class,
......
......@@ -21,8 +21,8 @@ Hive metadata can be modelled in DGI using its Type System. The default modellin
---++ Importing Hive Metadata
org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge imports the hive metadata into DGI using the typesystem defined in org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator. import-hive.sh command can be used to facilitate this.
Set-up the following configs in <dgi package>/conf/hive-site.xml:
* Hive metastore configuration - Refer [[https://cwiki.apache.org/confluence/display/Hive/AdminManual+MetastoreAdmin][Hive Metastore Configuration documentation]]
Set-up the following configs in hive-site.xml of your hive set-up and set environment variable HIVE_CONFIG to the
hive conf directory:
* DGI endpoint - Add the following property with the DGI endpoint for your set-up
<verbatim>
<property>
......@@ -57,4 +57,5 @@ The following properties in hive-site.xml control the thread pool details:
* hive.hook.dgi.minThreads - core number of threads. default 5
* hive.hook.dgi.maxThreads - maximum number of threads. default 5
* hive.hook.dgi.keepAliveTime - keep alive time in msecs. default 10
* hive.hook.dgi.synchronous - boolean, true to run the hook synchronously. default false
......@@ -58,7 +58,7 @@ public class HiveHookIT {
hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, System.getProperty("user.dir") + "/target/metastore");
hiveConf.set(HiveMetaStoreBridge.DGI_URL_PROPERTY, DGI_URL);
hiveConf.set("javax.jdo.option.ConnectionURL", "jdbc:derby:./target/metastore_db;create=true");
hiveConf.set("debug", "true");
hiveConf.set("hive.hook.dgi.synchronous", "true");
return hiveConf;
}
......@@ -114,10 +114,7 @@ public class HiveHookIT {
}
private void assertInstanceIsRegistered(String typeName, String colName, String colValue) throws Exception{
JSONObject result = dgiCLient.rawSearch(typeName, colName, colValue);
JSONArray results = (JSONArray) result.get("results");
JSONArray results = dgiCLient.rawSearch(typeName, colName, colValue);
Assert.assertEquals(results.length(), 1);
JSONObject resultRow = (JSONObject) results.get(0);
Assert.assertEquals(resultRow.get(typeName + "." + colName), colValue);
}
}
......@@ -22,6 +22,10 @@ import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.api.client.config.DefaultClientConfig;
import org.apache.hadoop.metadata.typesystem.ITypedReferenceableInstance;
import org.apache.hadoop.metadata.typesystem.Referenceable;
import org.apache.hadoop.metadata.typesystem.json.InstanceSerialization;
import org.apache.hadoop.metadata.typesystem.json.Serialization;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
......@@ -149,8 +153,14 @@ public class MetadataServiceClient {
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject getEntity(String guid) throws MetadataServiceException {
return callAPI(API.GET_ENTITY, null, guid);
public ITypedReferenceableInstance getEntity(String guid) throws MetadataServiceException {
JSONObject jsonResponse = callAPI(API.GET_ENTITY, null, guid);
try {
String entityInstanceDefinition = jsonResponse.getString(MetadataServiceClient.RESULTS);
return Serialization.fromJson(entityInstanceDefinition);
} catch (JSONException e) {
throw new MetadataServiceException(e);
}
}
public JSONObject searchEntity(String searchQuery) throws MetadataServiceException {
......@@ -167,14 +177,14 @@ public class MetadataServiceClient {
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject rawSearch(String typeName, String attributeName,
Object attributeValue) throws MetadataServiceException {
String gremlinQuery = String.format(
"g.V.has(\"typeName\",\"%s\").and(_().has(\"%s.%s\", T.eq, \"%s\")).toList()",
typeName, typeName, attributeName, attributeValue);
return searchByGremlin(gremlinQuery);
// String dslQuery = String.format("%s where %s = \"%s\"", typeName, attributeName, attributeValue);
// return searchByDSL(dslQuery);
public JSONArray rawSearch(String typeName, String attributeName, Object attributeValue) throws
MetadataServiceException {
// String gremlinQuery = String.format(
// "g.V.has(\"typeName\",\"%s\").and(_().has(\"%s.%s\", T.eq, \"%s\")).toList()",
// typeName, typeName, attributeName, attributeValue);
// return searchByGremlin(gremlinQuery);
String dslQuery = String.format("%s where %s = \"%s\"", typeName, attributeName, attributeValue);
return searchByDSL(dslQuery);
}
/**
......@@ -183,10 +193,15 @@ public class MetadataServiceClient {
* @return result json object
* @throws MetadataServiceException
*/
public JSONObject searchByDSL(String query) throws MetadataServiceException {
public JSONArray searchByDSL(String query) throws MetadataServiceException {
WebResource resource = getResource(API.SEARCH_DSL);
resource = resource.queryParam("query", query);
return callAPIWithResource(API.SEARCH_DSL, resource);
JSONObject result = callAPIWithResource(API.SEARCH_DSL, resource);
try {
return result.getJSONObject("results").getJSONArray("rows");
} catch (JSONException e) {
throw new MetadataServiceException(e);
}
}
/**
......
......@@ -104,7 +104,6 @@ class GremlinEvaluator(qry: GremlinQuery, persistenceStrategy: GraphPersistenceS
val v = rV.getColumn(src).get(idx)
sInstance.set(cName, persistenceStrategy.constructInstance(aE.dataType, v))
}
sInstance
addPathStruct(r, sInstance)
}
GremlinQueryResult(qry.expr.toString, rType, rows.toList)
......@@ -137,4 +136,4 @@ object JsonHelper {
def toJson(r: GremlinQueryResult): String = {
writePretty(r)
}
}
\ No newline at end of file
}
......@@ -99,7 +99,7 @@ mkdir -p $METADATA_LOG_DIR
pushd ${BASEDIR} > /dev/null
JAVA_PROPERTIES="$METADATA_OPTS $METADATA_PROPERTIES -Dmetadata.log.dir=$METADATA_LOG_DIR -Dmetadata.home=${METADATA_HOME_DIR} -Dmetadata.conf=${METADATA_CONF}"
JAVA_PROPERTIES="$METADATA_OPTS $METADATA_PROPERTIES -Dmetadata.log.dir=$METADATA_LOG_DIR -Dmetadata.home=${METADATA_HOME_DIR} -Dmetadata.conf=${METADATA_CONF} -Dmetadata.log.file=application.log"
shift
while [[ ${1} =~ ^\-D ]]; do
......
......@@ -28,7 +28,7 @@
</appender>
<appender name="FILE" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="${metadata.log.dir}/application.log"/>
<param name="File" value="${metadata.log.dir}/${metadata.log.file}"/>
<param name="Append" value="true"/>
<param name="Threshold" value="debug"/>
<layout class="org.apache.log4j.PatternLayout">
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment