diff --git a/docs/src/site/twiki/Configuration.twiki b/docs/src/site/twiki/Configuration.twiki new file mode 100644 index 0000000..0410ce2 --- /dev/null +++ b/docs/src/site/twiki/Configuration.twiki @@ -0,0 +1,60 @@ +---+ Configuring Apache Atlas + +---++ Introduction + +All configuration in Atlas uses java properties style configuration. + +---++ Application Properties + +The main configuration file is application.properties which is in the *conf* dir at the deployed +location. It consists of the following sections: + +---+++ Graph Database Configs + +---++++ Graph persistence engine + +This section sets up the graph db - titan - to use a persistence engine. Please refer to +<a href="http://s3.thinkaurelius.com/docs/titan/0.5.4/titan-config-ref.html">link</a> for more +details. The example below uses BerkeleyDBJE. + +<verbatim> +metadata.graph.storage.backend=berkeleyje +metadata.graph.storage.directory=data/berkley +</verbatim> + +---++++ Graph Search Index +This section sets up the graph db - titan - to use an search indexing system. The example +configuration below setsup to use an embedded Elastic search indexing system. + +<verbatim> +metadata.graph.index.search.backend=elasticsearch +metadata.graph.index.search.directory=data/es +metadata.graph.index.search.elasticsearch.client-only=false +metadata.graph.index.search.elasticsearch.local-mode=true +metadata.graph.index.search.elasticsearch.create.sleep=2000 +</verbatim> + +---+++ Hive Lineage Configs +The higher layer services like hive lineage, schema, etc. are driven by the type system and this +section encodes the specific types for the hive data model. + +# This models follows the quick-start guide +<verbatim> +metadata.lineage.hive.table.type.name=hive_table +metadata.lineage.hive.table.column.name=columns +metadata.lineage.hive.process.type.name=hive_process +metadata.lineage.hive.process.inputs.name=inputTables +metadata.lineage.hive.process.outputs.name=outputTables +#Currently unused +#metadata.lineage.hive.column.type.name=Column +</verbatim> + +---+++ Security Properties + +---++++ SSL config +The following property is used to toggle the SSL feature. + +<verbatim> +metadata.enableTLS=false +</verbatim> + diff --git a/repository/src/main/java/org/apache/hadoop/metadata/discovery/HiveLineageService.java b/repository/src/main/java/org/apache/hadoop/metadata/discovery/HiveLineageService.java index 0c65dc3..6ade9eb 100644 --- a/repository/src/main/java/org/apache/hadoop/metadata/discovery/HiveLineageService.java +++ b/repository/src/main/java/org/apache/hadoop/metadata/discovery/HiveLineageService.java @@ -19,7 +19,6 @@ package org.apache.hadoop.metadata.discovery; import com.thinkaurelius.titan.core.TitanGraph; -import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.hadoop.metadata.MetadataException; import org.apache.hadoop.metadata.PropertiesUtil; @@ -116,6 +115,24 @@ public class HiveLineageService implements LineageService { } /** + * Return the lineage outputs graph for the given tableName. + * + * @param tableName tableName + * @return Outputs Graph as JSON + */ + @Override + public String getOutputsGraph(String tableName) throws DiscoveryException { + LOG.info("Fetching lineage outputs graph for tableName={}", tableName); + + HiveWhereUsedQuery outputsQuery = new HiveWhereUsedQuery( + HIVE_TABLE_TYPE_NAME, tableName, HIVE_PROCESS_TYPE_NAME, + HIVE_PROCESS_INPUT_ATTRIBUTE_NAME, HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME, + Option.empty(), SELECT_ATTRIBUTES, true, + graphPersistenceStrategy, titanGraph); + return outputsQuery.graph().toInstanceJson(); + } + + /** * Return the lineage inputs for the given tableName. * * @param tableName tableName @@ -141,6 +158,24 @@ public class HiveLineageService implements LineageService { } /** + * Return the lineage inputs graph for the given tableName. + * + * @param tableName tableName + * @return Inputs Graph as JSON + */ + @Override + public String getInputsGraph(String tableName) throws DiscoveryException { + LOG.info("Fetching lineage inputs graph for tableName={}", tableName); + + HiveLineageQuery inputsQuery = new HiveLineageQuery( + HIVE_TABLE_TYPE_NAME, tableName, HIVE_PROCESS_TYPE_NAME, + HIVE_PROCESS_INPUT_ATTRIBUTE_NAME, HIVE_PROCESS_OUTPUT_ATTRIBUTE_NAME, + Option.empty(), SELECT_ATTRIBUTES, true, + graphPersistenceStrategy, titanGraph); + return inputsQuery.graph().toInstanceJson(); + } + + /** * Return the schema for the given tableName. * * @param tableName tableName diff --git a/repository/src/main/java/org/apache/hadoop/metadata/discovery/LineageService.java b/repository/src/main/java/org/apache/hadoop/metadata/discovery/LineageService.java index f2638e5..72e4ff9 100644 --- a/repository/src/main/java/org/apache/hadoop/metadata/discovery/LineageService.java +++ b/repository/src/main/java/org/apache/hadoop/metadata/discovery/LineageService.java @@ -32,6 +32,14 @@ public interface LineageService { String getOutputs(String tableName) throws DiscoveryException; /** + * Return the lineage outputs graph for the given tableName. + * + * @param tableName tableName + * @return Outputs Graph as JSON + */ + String getOutputsGraph(String tableName) throws DiscoveryException; + + /** * Return the lineage inputs for the given tableName. * * @param tableName tableName @@ -40,6 +48,14 @@ public interface LineageService { String getInputs(String tableName) throws DiscoveryException; /** + * Return the lineage inputs graph for the given tableName. + * + * @param tableName tableName + * @return Inputs Graph as JSON + */ + String getInputsGraph(String tableName) throws DiscoveryException; + + /** * Return the schema for the given tableName. * * @param tableName tableName diff --git a/webapp/src/main/java/org/apache/hadoop/metadata/web/resources/HiveLineageResource.java b/webapp/src/main/java/org/apache/hadoop/metadata/web/resources/HiveLineageResource.java index 108b4a9..a58cccc 100644 --- a/webapp/src/main/java/org/apache/hadoop/metadata/web/resources/HiveLineageResource.java +++ b/webapp/src/main/java/org/apache/hadoop/metadata/web/resources/HiveLineageResource.java @@ -93,6 +93,40 @@ public class HiveLineageResource { } /** + * Returns the inputs graph for a given entity. + * + * @param tableName table name + */ + @GET + @Path("table/{tableName}/inputs/graph") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response inputsGraph(@Context HttpServletRequest request, + @PathParam("tableName") String tableName) { + Preconditions.checkNotNull(tableName, "table name cannot be null"); + LOG.info("Fetching lineage inputs graph for tableName={}", tableName); + + try { + final String jsonResult = lineageService.getInputsGraph(tableName); + + JSONObject response = new JSONObject(); + response.put(MetadataServiceClient.REQUEST_ID, Servlets.getRequestId()); + response.put("tableName", tableName); + response.put(MetadataServiceClient.RESULTS, new JSONObject(jsonResult)); + + return Response.ok(response).build(); + } catch (DiscoveryException e) { + LOG.error("Unable to get lineage inputs graph for table {}", tableName, e); + throw new WebApplicationException( + Servlets.getErrorResponse(e, Response.Status.BAD_REQUEST)); + } catch (JSONException e) { + LOG.error("Unable to get lineage inputs graph for table {}", tableName, e); + throw new WebApplicationException( + Servlets.getErrorResponse(e, Response.Status.INTERNAL_SERVER_ERROR)); + } + } + + /** * Returns the outputs for a given entity. * * @param tableName table name @@ -117,11 +151,45 @@ public class HiveLineageResource { return Response.ok(response).build(); } catch (DiscoveryException e) { - LOG.error("Unable to get lineage inputs for table {}", tableName, e); + LOG.error("Unable to get lineage outputs for table {}", tableName, e); throw new WebApplicationException( Servlets.getErrorResponse(e, Response.Status.BAD_REQUEST)); } catch (JSONException e) { - LOG.error("Unable to get lineage inputs for table {}", tableName, e); + LOG.error("Unable to get lineage outputs for table {}", tableName, e); + throw new WebApplicationException( + Servlets.getErrorResponse(e, Response.Status.INTERNAL_SERVER_ERROR)); + } + } + + /** + * Returns the outputs graph for a given entity. + * + * @param tableName table name + */ + @GET + @Path("table/{tableName}/outputs/graph") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response outputsGraph(@Context HttpServletRequest request, + @PathParam("tableName") String tableName) { + Preconditions.checkNotNull(tableName, "table name cannot be null"); + LOG.info("Fetching lineage outputs graph for tableName={}", tableName); + + try { + final String jsonResult = lineageService.getOutputs(tableName); + + JSONObject response = new JSONObject(); + response.put(MetadataServiceClient.REQUEST_ID, Servlets.getRequestId()); + response.put("tableName", tableName); + response.put(MetadataServiceClient.RESULTS, new JSONObject(jsonResult)); + + return Response.ok(response).build(); + } catch (DiscoveryException e) { + LOG.error("Unable to get lineage outputs graph for table {}", tableName, e); + throw new WebApplicationException( + Servlets.getErrorResponse(e, Response.Status.BAD_REQUEST)); + } catch (JSONException e) { + LOG.error("Unable to get lineage outputs graph for table {}", tableName, e); throw new WebApplicationException( Servlets.getErrorResponse(e, Response.Status.INTERNAL_SERVER_ERROR)); }