Commit 92f3d449 by nixonrodrigues Committed by Madhan Neethiraj

ATLAS-2276: update Hive hook to add an option to retain case-sensitivity of path…

ATLAS-2276: update Hive hook to add an option to retain case-sensitivity of path in hdfs_path entities Signed-off-by: 's avatarMadhan Neethiraj <madhan@apache.org>
parent 9bf0d12e
...@@ -57,6 +57,7 @@ import org.slf4j.LoggerFactory; ...@@ -57,6 +57,7 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import static org.apache.atlas.hive.hook.HiveHook.CONF_PREFIX;
/** /**
* A Bridge Utility that imports metadata from the Hive Meta Store * A Bridge Utility that imports metadata from the Hive Meta Store
...@@ -96,13 +97,19 @@ public class HiveMetaStoreBridge { ...@@ -96,13 +97,19 @@ public class HiveMetaStoreBridge {
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class); private static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreBridge.class);
public final Hive hiveClient; public final Hive hiveClient;
private AtlasClient atlasClient = null; private final AtlasClient atlasClient;
private final boolean convertHdfsPathToLowerCase;
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) { HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient) {
this.clusterName = clusterName; this(clusterName, hiveClient, atlasClient, true);
this.hiveClient = hiveClient; }
this.atlasClient = atlasClient;
HiveMetaStoreBridge(String clusterName, Hive hiveClient, AtlasClient atlasClient, boolean convertHdfsPathToLowerCase) {
this.clusterName = clusterName;
this.hiveClient = hiveClient;
this.atlasClient = atlasClient;
this.convertHdfsPathToLowerCase = convertHdfsPathToLowerCase;
} }
public String getClusterName() { public String getClusterName() {
...@@ -122,13 +129,17 @@ public class HiveMetaStoreBridge { ...@@ -122,13 +129,17 @@ public class HiveMetaStoreBridge {
* @param hiveConf {@link HiveConf} for Hive component in the cluster * @param hiveConf {@link HiveConf} for Hive component in the cluster
*/ */
public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClient atlasClient) throws Exception { public HiveMetaStoreBridge(Configuration atlasProperties, HiveConf hiveConf, AtlasClient atlasClient) throws Exception {
this(atlasProperties.getString(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), Hive.get(hiveConf), atlasClient); this(atlasProperties.getString(HIVE_CLUSTER_NAME, DEFAULT_CLUSTER_NAME), Hive.get(hiveConf), atlasClient, atlasProperties.getBoolean(CONF_PREFIX + "hdfs_path.convert_to_lowercase", true));
} }
AtlasClient getAtlasClient() { AtlasClient getAtlasClient() {
return atlasClient; return atlasClient;
} }
public boolean isConvertHdfsPathToLowerCase() {
return convertHdfsPathToLowerCase;
}
void importHiveMetadata(boolean failOnError) throws Exception { void importHiveMetadata(boolean failOnError) throws Exception {
LOG.info("Importing hive metadata"); LOG.info("Importing hive metadata");
importDatabases(failOnError); importDatabases(failOnError);
...@@ -295,7 +306,7 @@ public class HiveMetaStoreBridge { ...@@ -295,7 +306,7 @@ public class HiveMetaStoreBridge {
Referenceable lineageProcess = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName()); Referenceable lineageProcess = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
ArrayList<Referenceable> sourceList = new ArrayList<>(); ArrayList<Referenceable> sourceList = new ArrayList<>();
ArrayList<Referenceable> targetList = new ArrayList<>(); ArrayList<Referenceable> targetList = new ArrayList<>();
String tableLocation = table.getDataLocation().toString(); String tableLocation = isConvertHdfsPathToLowerCase() ? HiveHook.lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
Referenceable path = fillHDFSDataSet(tableLocation); Referenceable path = fillHDFSDataSet(tableLocation);
String query = getCreateTableString(table, tableLocation); String query = getCreateTableString(table, tableLocation);
sourceList.add(path); sourceList.add(path);
......
...@@ -744,7 +744,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -744,7 +744,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
} else if (entity.getType() == Type.DFS_DIR) { } else if (entity.getType() == Type.DFS_DIR) {
URI location = entity.getLocation(); URI location = entity.getLocation();
if (location != null) { if (location != null) {
final String pathUri = lower(new Path(location).toString()); final String pathUri = dgiBridge.isConvertHdfsPathToLowerCase() ? lower(new Path(location).toString()) : new Path(location).toString();
LOG.debug("Registering DFS Path {} ", pathUri); LOG.debug("Registering DFS Path {} ", pathUri);
if (!dataSetsProcessed.contains(pathUri)) { if (!dataSetsProcessed.contains(pathUri)) {
Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri); Referenceable hdfsPath = dgiBridge.fillHDFSDataSet(pathUri);
...@@ -793,7 +793,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext { ...@@ -793,7 +793,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) { if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
LOG.info("Registering external table process {} ", event.getQueryStr()); LOG.info("Registering external table process {} ", event.getQueryStr());
final String location = lower(hiveTable.getDataLocation().toString()); final String location = dgiBridge.isConvertHdfsPathToLowerCase() ? lower(hiveTable.getDataLocation().toString()) : hiveTable.getDataLocation().toString();
final ReadEntity dfsEntity = new ReadEntity(); final ReadEntity dfsEntity = new ReadEntity();
dfsEntity.setTyp(Type.DFS_DIR); dfsEntity.setTyp(Type.DFS_DIR);
dfsEntity.setD(new Path(location)); dfsEntity.setD(new Path(location));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment