Commit 46365f8c by Suma Shivaprasad

ATLAS-527 Support lineage for load table, import, export (sumasai via shwethags)

parent 009330de
......@@ -17,6 +17,9 @@
*/
package org.apache.atlas.fs.model;
import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.addons.ModelDefinitionDump;
import org.apache.atlas.typesystem.TypesDef;
import org.apache.atlas.typesystem.json.TypesSerialization;
......
......@@ -31,13 +31,10 @@ import scala.tools.scalap.scalax.rules.scalasig.ClassFileParser.EnumConstValue
*/
object FSDataModel extends App {
var typesDef : TypesDef = null
val typesBuilder = new TypesBuilder
import typesBuilder._
typesDef = types {
val typesDef : TypesDef = types {
// FS DataSet
_class(FSDataTypes.FS_PATH.toString, List("DataSet", AtlasClient.REFERENCEABLE_SUPER_TYPE)) {
......
......@@ -18,18 +18,22 @@
package org.apache.atlas.hive.bridge;
import com.google.common.base.Joiner;
import com.google.common.annotations.VisibleForTesting;
import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasConstants;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.fs.model.FSDataModel;
import org.apache.atlas.fs.model.FSDataTypes;
import org.apache.atlas.hive.model.HiveDataModelGenerator;
import org.apache.atlas.hive.model.HiveDataTypes;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.atlas.typesystem.Struct;
import org.apache.atlas.typesystem.json.InstanceSerialization;
import org.apache.atlas.typesystem.json.TypesSerialization;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
......@@ -67,6 +71,9 @@ public class HiveMetaStoreBridge {
public static final String ATLAS_ENDPOINT = "atlas.rest.address";
private final String doAsUser;
private final UserGroupInformation ugi;
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
public final Hive hiveClient;
......@@ -82,6 +89,11 @@ public class HiveMetaStoreBridge {
this(hiveConf, atlasConf, null, null);
}
@VisibleForTesting
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) {
this(clusterName, hiveClient, atlasClient, null, null);
}
public String getClusterName() {
return clusterName;
}
......@@ -96,21 +108,16 @@ public class HiveMetaStoreBridge {
UserGroupInformation ugi) throws Exception {
this(hiveConf.get(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME),
Hive.get(hiveConf),
atlasConf, doAsUser, ugi);
}
HiveMetaStoreBridge(String clusterName, Hive hiveClient,
Configuration atlasConf, String doAsUser, UserGroupInformation ugi) {
this.clusterName = clusterName;
this.hiveClient = hiveClient;
String baseUrls = atlasConf.getString(ATLAS_ENDPOINT, DEFAULT_DGI_URL);
this.atlasClient = new AtlasClient(ugi, doAsUser, baseUrls.split(","));
new AtlasClient(atlasConf.getString(ATLAS_ENDPOINT, DEFAULT_DGI_URL), ugi, doAsUser), doAsUser, ugi);
}
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) {
@VisibleForTesting
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient, String user, UserGroupInformation ugi) {
this.clusterName = clusterName;
this.hiveClient = hiveClient;
this.atlasClient = atlasClient;
this.doAsUser = user;
this.ugi = ugi;
}
private AtlasClient getAtlasClient() {
......@@ -306,7 +313,7 @@ public class HiveMetaStoreBridge {
}
private Referenceable createOrUpdateTableInstance(Referenceable dbReference, Referenceable tableReference,
Table hiveTable) throws Exception {
final Table hiveTable) throws Exception {
LOG.info("Importing objects from {}.{}", hiveTable.getDbName(), hiveTable.getTableName());
if (tableReference == null) {
......@@ -348,6 +355,7 @@ public class HiveMetaStoreBridge {
tableReference.set(TABLE_TYPE_ATTR, hiveTable.getTableType().name());
tableReference.set("temporary", hiveTable.isTemporary());
return tableReference;
}
......@@ -453,6 +461,17 @@ public class HiveMetaStoreBridge {
return sdReferenceable;
}
public Referenceable fillHDFSDataSet(String pathUri) {
Referenceable ref = new Referenceable(FSDataTypes.HDFS_PATH().toString());
ref.set("path", pathUri);
// Path path = new Path(pathUri);
// ref.set("name", path.getName());
// TODO - Fix after ATLAS-542 to shorter Name
ref.set("name", pathUri);
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
return ref;
}
public static String getColumnQualifiedName(final String tableQualifiedName, final String colName) {
final String[] parts = tableQualifiedName.split("@");
final String tableName = parts[0];
......@@ -488,6 +507,21 @@ public class HiveMetaStoreBridge {
AtlasClient dgiClient = getAtlasClient();
try {
dgiClient.getType(FSDataTypes.HDFS_PATH().toString());
LOG.info("HDFS data model is already registered!");
} catch(AtlasServiceException ase) {
if (ase.getStatus() == ClientResponse.Status.NOT_FOUND) {
//Trigger val definition
FSDataModel.main(null);
final String hdfsModelJson = TypesSerialization.toJson(FSDataModel.typesDef());
//Expected in case types do not exist
LOG.info("Registering HDFS data model : " + hdfsModelJson);
dgiClient.createType(hdfsModelJson);
}
}
try {
dgiClient.getType(HiveDataTypes.HIVE_PROCESS.getName());
LOG.info("Hive data model is already registered!");
} catch(AtlasServiceException ase) {
......
......@@ -219,8 +219,10 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
dataSetReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, hdfsPathStr);
dataSetReferenceable.set("path", hdfsPathStr);
dataSetReferenceable.set("owner", stormConf.get("hdfs.kerberos.principal"));
final Path hdfsPath = new Path(hdfsPathStr);
dataSetReferenceable.set(AtlasClient.NAME, hdfsPath.getName());
//Fix after ATLAS-542
// final Path hdfsPath = new Path(hdfsPathStr);
// dataSetReferenceable.set(AtlasClient.NAME, hdfsPath.getName());
dataSetReferenceable.set(AtlasClient.NAME, hdfsPathStr);
break;
case "HiveBolt":
......
......@@ -91,6 +91,9 @@ public class AtlasClient {
public static final String REFERENCEABLE_SUPER_TYPE = "Referenceable";
public static final String REFERENCEABLE_ATTRIBUTE_NAME = "qualifiedName";
public static final String PROCESS_ATTRIBUTE_INPUTS = "inputs";
public static final String PROCESS_ATTRIBUTE_OUTPUTS = "outputs";
public static final String JSON_MEDIA_TYPE = MediaType.APPLICATION_JSON + "; charset=UTF-8";
public static final String UNKNOWN_STATUS = "Unknown status";
......
......@@ -13,6 +13,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
ATLAS-527 Support lineage for load table, import, export (sumasai via shwethags)
ATLAS-572 Handle secure instance of Zookeeper for leader election.(yhemanth via sumasai)
ATLAS-605 Hook Notifications for DELETE entity needs to be supported (sumasai)
ATLAS-607 Add Support for delete entity through a qualifiedName (sumasai via yhemanth)
......
......@@ -31,6 +31,8 @@ import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.Comparator;
public class ReservedTypesRegistrar implements IBootstrapTypesRegistrar {
......@@ -48,7 +50,17 @@ public class ReservedTypesRegistrar implements IBootstrapTypesRegistrar {
LOG.info("No types directory {} found - not registering any reserved types", typesDirName);
return;
}
File[] typeDefFiles = typesDir.listFiles();
//TODO - Enforce a dependency order among models registered by definition and not by modifiedTime as below
// Workaround - Sort by modifiedTime to get the dependency of models in the right order - first hdfs, followed by hive and hive is needed by storm, falcon models.
// Sorting them by time will ensure the right order since the modules are in the correct order in pom.
Arrays.sort(typeDefFiles, new Comparator<File>() {
public int compare(File f1, File f2) {
return Long.valueOf(f1.lastModified()).compareTo(f2.lastModified());
}
});
for (File typeDefFile : typeDefFiles) {
try {
String typeDefJSON = new String(Files.readAllBytes(typeDefFile.toPath()), StandardCharsets.UTF_8);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment