Commit 46365f8c by Suma Shivaprasad

ATLAS-527 Support lineage for load table, import, export (sumasai via shwethags)

parent 009330de
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
*/ */
package org.apache.atlas.fs.model; package org.apache.atlas.fs.model;
import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.addons.ModelDefinitionDump; import org.apache.atlas.addons.ModelDefinitionDump;
import org.apache.atlas.typesystem.TypesDef; import org.apache.atlas.typesystem.TypesDef;
import org.apache.atlas.typesystem.json.TypesSerialization; import org.apache.atlas.typesystem.json.TypesSerialization;
......
...@@ -31,13 +31,10 @@ import scala.tools.scalap.scalax.rules.scalasig.ClassFileParser.EnumConstValue ...@@ -31,13 +31,10 @@ import scala.tools.scalap.scalax.rules.scalasig.ClassFileParser.EnumConstValue
*/ */
object FSDataModel extends App { object FSDataModel extends App {
var typesDef : TypesDef = null
val typesBuilder = new TypesBuilder val typesBuilder = new TypesBuilder
import typesBuilder._ import typesBuilder._
typesDef = types { val typesDef : TypesDef = types {
// FS DataSet // FS DataSet
_class(FSDataTypes.FS_PATH.toString, List("DataSet", AtlasClient.REFERENCEABLE_SUPER_TYPE)) { _class(FSDataTypes.FS_PATH.toString, List("DataSet", AtlasClient.REFERENCEABLE_SUPER_TYPE)) {
......
...@@ -18,18 +18,22 @@ ...@@ -18,18 +18,22 @@
package org.apache.atlas.hive.bridge; package org.apache.atlas.hive.bridge;
import com.google.common.base.Joiner; import com.google.common.annotations.VisibleForTesting;
import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.ApplicationProperties; import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient; import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasConstants; import org.apache.atlas.AtlasConstants;
import org.apache.atlas.AtlasServiceException; import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.fs.model.FSDataModel;
import org.apache.atlas.fs.model.FSDataTypes;
import org.apache.atlas.hive.model.HiveDataModelGenerator; import org.apache.atlas.hive.model.HiveDataModelGenerator;
import org.apache.atlas.hive.model.HiveDataTypes; import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.typesystem.Referenceable; import org.apache.atlas.typesystem.Referenceable;
import org.apache.atlas.typesystem.Struct; import org.apache.atlas.typesystem.Struct;
import org.apache.atlas.typesystem.json.InstanceSerialization; import org.apache.atlas.typesystem.json.InstanceSerialization;
import org.apache.atlas.typesystem.json.TypesSerialization;
import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.FieldSchema;
...@@ -67,6 +71,9 @@ public class HiveMetaStoreBridge { ...@@ -67,6 +71,9 @@ public class HiveMetaStoreBridge {
public static final String ATLAS_ENDPOINT = "atlas.rest.address"; public static final String ATLAS_ENDPOINT = "atlas.rest.address";
private final String doAsUser;
private final UserGroupInformation ugi;
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class); private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
public final Hive hiveClient; public final Hive hiveClient;
...@@ -82,6 +89,11 @@ public class HiveMetaStoreBridge { ...@@ -82,6 +89,11 @@ public class HiveMetaStoreBridge {
this(hiveConf, atlasConf, null, null); this(hiveConf, atlasConf, null, null);
} }
@VisibleForTesting
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) {
this(clusterName, hiveClient, atlasClient, null, null);
}
public String getClusterName() { public String getClusterName() {
return clusterName; return clusterName;
} }
...@@ -96,21 +108,16 @@ public class HiveMetaStoreBridge { ...@@ -96,21 +108,16 @@ public class HiveMetaStoreBridge {
UserGroupInformation ugi) throws Exception { UserGroupInformation ugi) throws Exception {
this(hiveConf.get(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), this(hiveConf.get(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME),
Hive.get(hiveConf), Hive.get(hiveConf),
atlasConf, doAsUser, ugi); new AtlasClient(atlasConf.getString(ATLAS_ENDPOINT, DEFAULT_DGI_URL), ugi, doAsUser), doAsUser, ugi);
}
HiveMetaStoreBridge(String clusterName, Hive hiveClient,
Configuration atlasConf, String doAsUser, UserGroupInformation ugi) {
this.clusterName = clusterName;
this.hiveClient = hiveClient;
String baseUrls = atlasConf.getString(ATLAS_ENDPOINT, DEFAULT_DGI_URL);
this.atlasClient = new AtlasClient(ugi, doAsUser, baseUrls.split(","));
} }
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) { @VisibleForTesting
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient, String user, UserGroupInformation ugi) {
this.clusterName = clusterName; this.clusterName = clusterName;
this.hiveClient = hiveClient; this.hiveClient = hiveClient;
this.atlasClient = atlasClient; this.atlasClient = atlasClient;
this.doAsUser = user;
this.ugi = ugi;
} }
private AtlasClient getAtlasClient() { private AtlasClient getAtlasClient() {
...@@ -306,7 +313,7 @@ public class HiveMetaStoreBridge { ...@@ -306,7 +313,7 @@ public class HiveMetaStoreBridge {
} }
private Referenceable createOrUpdateTableInstance(Referenceable dbReference, Referenceable tableReference, private Referenceable createOrUpdateTableInstance(Referenceable dbReference, Referenceable tableReference,
Table hiveTable) throws Exception { final Table hiveTable) throws Exception {
LOG.info("Importing objects from {}.{}", hiveTable.getDbName(), hiveTable.getTableName()); LOG.info("Importing objects from {}.{}", hiveTable.getDbName(), hiveTable.getTableName());
if (tableReference == null) { if (tableReference == null) {
...@@ -348,6 +355,7 @@ public class HiveMetaStoreBridge { ...@@ -348,6 +355,7 @@ public class HiveMetaStoreBridge {
tableReference.set(TABLE_TYPE_ATTR, hiveTable.getTableType().name()); tableReference.set(TABLE_TYPE_ATTR, hiveTable.getTableType().name());
tableReference.set("temporary", hiveTable.isTemporary()); tableReference.set("temporary", hiveTable.isTemporary());
return tableReference; return tableReference;
} }
...@@ -453,6 +461,17 @@ public class HiveMetaStoreBridge { ...@@ -453,6 +461,17 @@ public class HiveMetaStoreBridge {
return sdReferenceable; return sdReferenceable;
} }
public Referenceable fillHDFSDataSet(String pathUri) {
Referenceable ref = new Referenceable(FSDataTypes.HDFS_PATH().toString());
ref.set("path", pathUri);
// Path path = new Path(pathUri);
// ref.set("name", path.getName());
// TODO - Fix after ATLAS-542 to shorter Name
ref.set("name", pathUri);
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
return ref;
}
public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) { public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) {
final String[] parts = tableQualifiedName.split("@"); final String[] parts = tableQualifiedName.split("@");
final String tableName = parts[0]; final String tableName = parts[0];
...@@ -488,6 +507,21 @@ public class HiveMetaStoreBridge { ...@@ -488,6 +507,21 @@ public class HiveMetaStoreBridge {
AtlasClient dgiClient = getAtlasClient(); AtlasClient dgiClient = getAtlasClient();
try { try {
dgiClient.getType(FSDataTypes.HDFS_PATH().toString());
LOG.info("HDFS data model is already registered!");
} catch(AtlasServiceException ase) {
if (ase.getStatus() == ClientResponse.Status.NOT_FOUND) {
//Trigger val definition
FSDataModel.main(null);
final String hdfsModelJson = TypesSerialization.toJson(FSDataModel.typesDef());
//Expected in case types do not exist
LOG.info("Registering HDFS data model : " + hdfsModelJson);
dgiClient.createType(hdfsModelJson);
}
}
try {
dgiClient.getType(HiveDataTypes.HIVE_PROCESS.getName()); dgiClient.getType(HiveDataTypes.HIVE_PROCESS.getName());
LOG.info("Hive data model is already registered!"); LOG.info("Hive data model is already registered!");
} catch(AtlasServiceException ase) { } catch(AtlasServiceException ase) {
......
...@@ -219,8 +219,10 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook { ...@@ -219,8 +219,10 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, hdfsPathStr); dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, hdfsPathStr);
dataSetReferenceable.set("path", hdfsPathStr); dataSetReferenceable.set("path", hdfsPathStr);
dataSetReferenceable.set("owner", stormConf.get("hdfs.kerberos.principal")); dataSetReferenceable.set("owner", stormConf.get("hdfs.kerberos.principal"));
final Path hdfsPath = new Path(hdfsPathStr); //Fix after ATLAS-542
dataSetReferenceable.set(AtlasClient.NAME, hdfsPath.getName()); // final Path hdfsPath = new Path(hdfsPathStr);
// dataSetReferenceable.set(AtlasClient.NAME, hdfsPath.getName());
dataSetReferenceable.set(AtlasClient.NAME, hdfsPathStr);
break; break;
case "HiveBolt": case "HiveBolt":
......
...@@ -91,6 +91,9 @@ public class AtlasClient { ...@@ -91,6 +91,9 @@ public class AtlasClient {
public static final String REFERENCEABLE_SUPER_TYPE = "Referenceable"; public static final String REFERENCEABLE_SUPER_TYPE = "Referenceable";
public static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName"; public static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName";
public static final String PROCESS_ATTRIBUTE_INPUTS = "inputs";
public static final String PROCESS_ATTRIBUTE_OUTPUTS = "outputs";
public static final String JSON_MEDIA_TYPE = MediaType.APPLICATION_JSON + "; charset=UTF-8"; public static final String JSON_MEDIA_TYPE = MediaType.APPLICATION_JSON + "; charset=UTF-8";
public static final String UNKNOWN_STATUS = "Unknown status"; public static final String UNKNOWN_STATUS = "Unknown status";
......
...@@ -13,6 +13,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset ...@@ -13,6 +13,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags) ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES: ALL CHANGES:
ATLAS-527 Support lineage for load table, import, export (sumasai via shwethags)
ATLAS-572 Handle secure instance of Zookeeper for leader election.(yhemanth via sumasai) ATLAS-572 Handle secure instance of Zookeeper for leader election.(yhemanth via sumasai)
ATLAS-605 Hook Notifications for DELETE entity needs to be supported (sumasai) ATLAS-605 Hook Notifications for DELETE entity needs to be supported (sumasai)
ATLAS-607 Add Support for delete entity through a qualifiedName (sumasai via yhemanth) ATLAS-607 Add Support for delete entity through a qualifiedName (sumasai via yhemanth)
......
...@@ -31,6 +31,8 @@ import java.io.File; ...@@ -31,6 +31,8 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.Arrays;
import java.util.Comparator;
public class ReservedTypesRegistrar implements IBootstrapTypesRegistrar { public class ReservedTypesRegistrar implements IBootstrapTypesRegistrar {
...@@ -48,7 +50,17 @@ public class ReservedTypesRegistrar implements IBootstrapTypesRegistrar { ...@@ -48,7 +50,17 @@ public class ReservedTypesRegistrar implements IBootstrapTypesRegistrar {
LOG.info("No types directory {} found - not registering any reserved types", typesDirName); LOG.info("No types directory {} found - not registering any reserved types", typesDirName);
return; return;
} }
File[] typeDefFiles = typesDir.listFiles(); File[] typeDefFiles = typesDir.listFiles();
//TODO - Enforce a dependency order among models registered by definition and not by modifiedTime as below
// Workaround - Sort by modifiedTime to get the dependency of models in the right order - first hdfs, followed by hive and hive is needed by storm, falcon models.
// Sorting them by time will ensure the right order since the modules are in the correct order in pom.
Arrays.sort(typeDefFiles, new Comparator<File>() {
public int compare(File f1, File f2) {
return Long.valueOf(f1.lastModified()).compareTo(f2.lastModified());
}
});
for (File typeDefFile : typeDefFiles) { for (File typeDefFile : typeDefFiles) {
try { try {
String typeDefJSON = new String(Files.readAllBytes(typeDefFile.toPath()), StandardCharsets.UTF_8); String typeDefJSON = new String(Files.readAllBytes(typeDefFile.toPath()), StandardCharsets.UTF_8);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment