Commit f592043c by Venkatesh Seetharam

Add Hive Lineage API

parent 1380771d
......@@ -352,8 +352,9 @@ public class HiveDataModelGenerator {
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("sd", HiveDataTypes.HIVE_STORAGEDESC.getName(),
Multiplicity.REQUIRED, false, null),
//new AttributeDefinition("columns", String.format("array<%s>", DefinedTypes
// .HIVE_COLUMN.getName()), Multiplicity.COLLECTION, true, null),
new AttributeDefinition("columns",
DataTypes.arrayTypeName(HiveDataTypes.HIVE_COLUMN.getName()),
Multiplicity.COLLECTION, true, null),
new AttributeDefinition("parameters", STRING_MAP_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
......@@ -382,11 +383,11 @@ public class HiveDataModelGenerator {
new AttributeDefinition("sd", HiveDataTypes.HIVE_STORAGEDESC.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("partitionKeys",
String.format("array<%s>", HiveDataTypes.HIVE_COLUMN.getName()),
DataTypes.arrayTypeName(HiveDataTypes.HIVE_COLUMN.getName()),
Multiplicity.OPTIONAL, false, null),
// new AttributeDefinition("columns", // todo - ask venkat
// String.format("array<%s>", HiveDataTypes.HIVE_COLUMN.getName()),
// Multiplicity.COLLECTION, true, null),
new AttributeDefinition("columns",
DataTypes.arrayTypeName(HiveDataTypes.HIVE_COLUMN.getName()),
Multiplicity.COLLECTION, true, null),
new AttributeDefinition("parameters", STRING_MAP_TYPE.getName(),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("viewOriginalText", DataTypes.STRING_TYPE.getName(),
......@@ -488,11 +489,11 @@ public class HiveDataModelGenerator {
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("userName", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
new AttributeDefinition("sourceTableNames",
String.format("array<%s>", HiveDataTypes.HIVE_TABLE.getName()),
new AttributeDefinition("inputTables",
DataTypes.arrayTypeName(HiveDataTypes.HIVE_TABLE.getName()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("targetTableNames",
String.format("array<%s>", HiveDataTypes.HIVE_TABLE.getName()),
new AttributeDefinition("outputTables",
DataTypes.arrayTypeName(HiveDataTypes.HIVE_TABLE.getName()),
Multiplicity.OPTIONAL, false, null),
new AttributeDefinition("queryText", DataTypes.STRING_TYPE.getName(),
Multiplicity.REQUIRED, false, null),
......
......@@ -26,8 +26,6 @@ import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.hive.bridge.HiveMetaStoreBridge;
import org.apache.hadoop.metadata.hive.model.HiveDataModelGenerator;
import org.apache.hadoop.metadata.hive.model.HiveDataTypes;
import org.apache.hadoop.metadata.typesystem.TypesDef;
import org.apache.hadoop.metadata.typesystem.json.TypesSerialization;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.testng.Assert;
......
......@@ -692,7 +692,6 @@
<filtering>false</filtering>
<includes>
<include>application.properties</include>
<include>graph.properties</include>
<include>log4j.xml</include>
</includes>
</resource>
......@@ -893,6 +892,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<version>2.7.2</version>
<configuration>
<!--<skipTests>true</skipTests>-->
<forkMode>always</forkMode>
</configuration>
<dependencies>
......
......@@ -22,6 +22,8 @@ import com.google.inject.Scopes;
import com.google.inject.throwingproviders.ThrowingProviderBinder;
import com.thinkaurelius.titan.core.TitanGraph;
import org.apache.hadoop.metadata.discovery.DiscoveryService;
import org.apache.hadoop.metadata.discovery.HiveLineageService;
import org.apache.hadoop.metadata.discovery.LineageService;
import org.apache.hadoop.metadata.discovery.SearchIndexer;
import org.apache.hadoop.metadata.discovery.graph.GraphBackedDiscoveryService;
import org.apache.hadoop.metadata.repository.MetadataRepository;
......@@ -48,6 +50,7 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
private Class<? extends MetadataService> metadataService;
private Class<? extends DiscoveryService> discoveryService;
private Class<? extends SearchIndexer> searchIndexer;
private Class<? extends LineageService> lineageService;
public RepositoryMetadataModule() {
// GraphServiceConfigurator gsp = new GraphServiceConfigurator();
......@@ -59,6 +62,7 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
this.metadataService = DefaultMetadataService.class;
this.discoveryService = GraphBackedDiscoveryService.class;
this.searchIndexer = GraphBackedSearchIndexer.class;
this.lineageService = HiveLineageService.class;
}
protected void configure() {
......@@ -86,5 +90,7 @@ public class RepositoryMetadataModule extends com.google.inject.AbstractModule {
bind(DiscoveryService.class).to(discoveryService);
bind(SearchIndexer.class).to(searchIndexer);
bind(LineageService.class).to(lineageService);
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.discovery;
import com.thinkaurelius.titan.core.TitanGraph;
import org.apache.hadoop.metadata.discovery.graph.DefaultGraphPersistenceStrategy;
import org.apache.hadoop.metadata.query.Expressions;
import org.apache.hadoop.metadata.query.GremlinQuery;
import org.apache.hadoop.metadata.query.GremlinTranslator;
import org.apache.hadoop.metadata.query.HiveLineageQuery;
import org.apache.hadoop.metadata.query.HiveWhereUsedQuery;
import org.apache.hadoop.metadata.query.QueryProcessor;
import org.apache.hadoop.metadata.repository.MetadataRepository;
import org.apache.hadoop.metadata.repository.graph.GraphProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;
import scala.collection.immutable.List;
import javax.inject.Inject;
import javax.inject.Singleton;
/**
* Hive implementation of Lineage service interface.
*/
@Singleton
public class HiveLineageService implements LineageService {
private static final Logger LOG = LoggerFactory.getLogger(HiveLineageService.class);
// todo - externalize these into configuration
private static final String HIVE_TABLE_TYPE_NAME = "hive_table";
private static final String HIVE_PROCESS_TYPE_NAME = "hive_process";
private static final String HIVE_PROCESS_INPUT_ATTRIBUTE_NAME = "inputTables";
private static final String HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME = "outputTables";
private final TitanGraph titanGraph;
private final DefaultGraphPersistenceStrategy graphPersistenceStrategy;
@Inject
HiveLineageService(GraphProvider<TitanGraph> graphProvider,
MetadataRepository metadataRepository) throws DiscoveryException {
this.titanGraph = graphProvider.get();
this.graphPersistenceStrategy = new DefaultGraphPersistenceStrategy(metadataRepository);
}
/**
* Return the lineage outputs for the given tableName.
*
* @param tableName tableName
* @return Lineage Outputs as JSON
*/
@Override
public String getOutputs(String tableName) throws DiscoveryException {
LOG.info("Fetching lineage outputs for tableName={}", tableName);
try {
HiveWhereUsedQuery outputsQuery = new HiveWhereUsedQuery(
HIVE_TABLE_TYPE_NAME, tableName, HIVE_PROCESS_TYPE_NAME,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME, HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME,
Option.empty(), Option.<List<String>>empty(), true,
graphPersistenceStrategy, titanGraph);
Expressions.Expression expression = outputsQuery.expr();
Expressions.Expression validatedExpression = QueryProcessor.validate(expression);
GremlinQuery gremlinQuery = new GremlinTranslator(
validatedExpression, graphPersistenceStrategy).translate();
if (LOG.isDebugEnabled()) {
System.out.println("Query = " + validatedExpression);
System.out.println("Expression Tree = " + validatedExpression.treeString());
System.out.println("Gremlin Query = " + gremlinQuery.queryStr());
}
return outputsQuery.evaluate().toJson();
} catch (Exception e) { // unable to catch ExpressionException
throw new DiscoveryException("Invalid expression", e);
}
}
/**
* Return the lineage inputs for the given tableName.
*
* @param tableName tableName
* @return Lineage Inputs as JSON
*/
@Override
public String getInputs(String tableName) throws DiscoveryException {
LOG.info("Fetching lineage inputs for tableName={}", tableName);
try {
HiveLineageQuery inputsQuery = new HiveLineageQuery(
HIVE_TABLE_TYPE_NAME, tableName, HIVE_PROCESS_TYPE_NAME,
HIVE_PROCESS_INPUT_ATTRIBUTE_NAME, HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME,
Option.empty(), Option.<List<String>>empty(), true,
graphPersistenceStrategy, titanGraph);
Expressions.Expression expression = inputsQuery.expr();
Expressions.Expression validatedExpression = QueryProcessor.validate(expression);
GremlinQuery gremlinQuery =
new GremlinTranslator(validatedExpression, graphPersistenceStrategy).translate();
if (LOG.isDebugEnabled()) {
System.out.println("Query = " + validatedExpression);
System.out.println("Expression Tree = " + validatedExpression.treeString());
System.out.println("Gremlin Query = " + gremlinQuery.queryStr());
}
return inputsQuery.evaluate().toJson();
} catch (Exception e) { // unable to catch ExpressionException
throw new DiscoveryException("Invalid expression", e);
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.discovery;
/**
* Lineage service interface.
*/
public interface LineageService {
/**
* Return the lineage outputs for the given tableName.
*
* @param tableName tableName
* @return Outputs as JSON
*/
String getOutputs(String tableName) throws DiscoveryException;
/**
* Return the lineage inputs for the given tableName.
*
* @param tableName tableName
* @return Inputs as JSON
*/
String getInputs(String tableName) throws DiscoveryException;
}
......@@ -45,6 +45,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
......@@ -54,6 +55,7 @@ import java.util.Set;
* Simple wrapper over TypeSystem and MetadataRepository services with hooks
* for listening to changes to the repository.
*/
@Singleton
public class DefaultMetadataService implements MetadataService {
private static final Logger LOG =
......
......@@ -67,6 +67,11 @@
<appender-ref ref="FILE"/>
</logger>
<logger name="com.google" additivity="false">
<level value="info"/>
<appender-ref ref="FILE"/>
</logger>
<logger name="AUDIT">
<level value="info"/>
<appender-ref ref="AUDIT"/>
......
......@@ -84,8 +84,13 @@
<appender-ref ref="FILE"/>
</logger>
<logger name="com.google" additivity="false">
<level value="info"/>
<appender-ref ref="FILE"/>
</logger>
<root>
<priority value="debug"/>
<priority value="info"/>
<appender-ref ref="FILE"/>
</root>
......
......@@ -243,8 +243,7 @@
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${basedir}/conf
</outputDirectory>
<outputDirectory>${basedir}/conf</outputDirectory>
<resources>
<resource>
<directory>${project.build.directory}/conf</directory>
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metadata.web.resources;
import com.google.common.base.Preconditions;
import org.apache.hadoop.metadata.MetadataServiceClient;
import org.apache.hadoop.metadata.discovery.DiscoveryException;
import org.apache.hadoop.metadata.discovery.LineageService;
import org.apache.hadoop.metadata.web.util.Servlets;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Singleton;
import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
/**
* Jersey Resource for Hive Table Lineage.
*/
@Path("lineage/hive")
@Singleton
public class HiveLineageResource {
private static final Logger LOG = LoggerFactory.getLogger(HiveLineageResource.class);
private final LineageService lineageService;
/**
* Created by the Guice ServletModule and injected with the
* configured LineageService.
*
* @param lineageService lineage service handle
*/
@Inject
public HiveLineageResource(LineageService lineageService) {
this.lineageService = lineageService;
}
/**
* Returns the inputs for a given entity.
*
* @param tableName table name
*/
@GET
@Path("inputs/{tableName}")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public Response inputs(@Context HttpServletRequest request,
@PathParam("tableName") String tableName) {
Preconditions.checkNotNull(tableName, "table name cannot be null");
LOG.info("Fetching lineage inputs for tableName={}", tableName);
try {
final String jsonResult = lineageService.getInputs(tableName);
JSONObject response = new JSONObject();
response.put(MetadataServiceClient.REQUEST_ID, Servlets.getRequestId());
response.put("tableName", tableName);
response.put(MetadataServiceClient.RESULTS, new JSONObject(jsonResult));
return Response.ok(response).build();
} catch (DiscoveryException e) {
LOG.error("Unable to get lineage inputs for table {}", tableName, e);
throw new WebApplicationException(
Servlets.getErrorResponse(e, Response.Status.BAD_REQUEST));
} catch (JSONException e) {
LOG.error("Unable to get lineage inputs for table {}", tableName, e);
throw new WebApplicationException(
Servlets.getErrorResponse(e, Response.Status.INTERNAL_SERVER_ERROR));
}
}
/**
* Returns the outputs for a given entity.
*
* @param tableName table name
*/
@GET
@Path("outputs/{tableName}")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public Response outputs(@Context HttpServletRequest request,
@PathParam("tableName") String tableName) {
Preconditions.checkNotNull(tableName, "table name cannot be null");
LOG.info("Fetching lineage outputs for tableName={}", tableName);
try {
final String jsonResult = lineageService.getOutputs(tableName);
JSONObject response = new JSONObject();
response.put(MetadataServiceClient.REQUEST_ID, Servlets.getRequestId());
response.put("tableName", tableName);
response.put(MetadataServiceClient.RESULTS, new JSONObject(jsonResult));
return Response.ok(response).build();
} catch (DiscoveryException e) {
LOG.error("Unable to get lineage inputs for table {}", tableName, e);
throw new WebApplicationException(
Servlets.getErrorResponse(e, Response.Status.BAD_REQUEST));
} catch (JSONException e) {
LOG.error("Unable to get lineage inputs for table {}", tableName, e);
throw new WebApplicationException(
Servlets.getErrorResponse(e, Response.Status.INTERNAL_SERVER_ERROR));
}
}
}
......@@ -43,12 +43,6 @@ import java.util.Map;
/**
* Jersey Resource for metadata operations.
*
* The entry point for all operations against various aspects of the entities graph.
*
* For instance,
* lineage: given an entity, X, get me the lineage - all entities X is derived from (recursively)
* 'search': find entities generated by Hive processes or that were generated by Sqoop, etc.
*/
@Path("discovery")
@Singleton
......
......@@ -64,7 +64,7 @@ public class EntityJerseyResourceIT extends BaseResourceIT {
private static final String DATABASE_TYPE = "hive_database";
private static final String DATABASE_NAME = "foo";
private static final String TABLE_TYPE = "hive_table";
private static final String TABLE_TYPE = "hive_table_type";
private static final String TABLE_NAME = "bar";
private Referenceable tableInstance;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment