Commit aa15cd0a by Suma Shivaprasad

ATLAS-1139 Parameter name of a HDFS DataSet entity should contain filesystem…

ATLAS-1139 Parameter name of a HDFS DataSet entity should contain filesystem path (svimal2106 via sumasai)
parent fdf97ae4
...@@ -46,6 +46,7 @@ import org.apache.falcon.entity.v0.process.Input; ...@@ -46,6 +46,7 @@ import org.apache.falcon.entity.v0.process.Input;
import org.apache.falcon.entity.v0.process.Output; import org.apache.falcon.entity.v0.process.Output;
import org.apache.falcon.entity.v0.process.Workflow; import org.apache.falcon.entity.v0.process.Workflow;
import org.apache.falcon.workflow.WorkflowExecutionArgs; import org.apache.falcon.workflow.WorkflowExecutionArgs;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -323,7 +324,8 @@ public class FalconBridge { ...@@ -323,7 +324,8 @@ public class FalconBridge {
// Path path = new Path(pathUri); // Path path = new Path(pathUri);
// ref.set("name", path.getName()); // ref.set("name", path.getName());
//TODO - Fix after ATLAS-542 to shorter Name //TODO - Fix after ATLAS-542 to shorter Name
ref.set(AtlasClient.NAME, pathUri); Path path = new Path(pathUri);
ref.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri); ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
ref.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName); ref.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, clusterName);
entities.add(ref); entities.add(ref);
......
...@@ -563,7 +563,7 @@ public class HiveMetaStoreBridge { ...@@ -563,7 +563,7 @@ public class HiveMetaStoreBridge {
Referenceable ref = new Referenceable(FSDataTypes.HDFS_PATH().toString()); Referenceable ref = new Referenceable(FSDataTypes.HDFS_PATH().toString());
ref.set("path", pathUri); ref.set("path", pathUri);
Path path = new Path(pathUri); Path path = new Path(pathUri);
ref.set(AtlasClient.NAME, path.getName()); ref.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri); ref.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, pathUri);
return ref; return ref;
} }
......
...@@ -218,13 +218,14 @@ public class HiveITBase { ...@@ -218,13 +218,14 @@ public class HiveITBase {
List<Id> hdfsPathRefs = (List<Id>) processReference.get(attributeName); List<Id> hdfsPathRefs = (List<Id>) processReference.get(attributeName);
for (int i = 0; i < testPaths.length; i++) { for (int i = 0; i < testPaths.length; i++) {
final String testPathNormed = lower(new Path(testPaths[i]).toString()); final Path path = new Path(testPaths[i]);
final String testPathNormed = lower(path.toString());
String hdfsPathId = assertHDFSPathIsRegistered(testPathNormed); String hdfsPathId = assertHDFSPathIsRegistered(testPathNormed);
Assert.assertEquals(hdfsPathRefs.get(0)._getId(), hdfsPathId); Assert.assertEquals(hdfsPathRefs.get(0)._getId(), hdfsPathId);
Referenceable hdfsPathRef = atlasClient.getEntity(hdfsPathId); Referenceable hdfsPathRef = atlasClient.getEntity(hdfsPathId);
Assert.assertEquals(hdfsPathRef.get("path"), testPathNormed); Assert.assertEquals(hdfsPathRef.get("path"), testPathNormed);
Assert.assertEquals(hdfsPathRef.get(NAME), new Path(testPathNormed).getName()); Assert.assertEquals(hdfsPathRef.get(NAME), Path.getPathWithoutSchemeAndAuthority(path).toString().toLowerCase());
Assert.assertEquals(hdfsPathRef.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), testPathNormed); Assert.assertEquals(hdfsPathRef.get(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME), testPathNormed);
} }
} }
......
...@@ -119,8 +119,8 @@ public class SqoopHookIT { ...@@ -119,8 +119,8 @@ public class SqoopHookIT {
private String assertHiveTableIsRegistered(String dbName, String tableName) throws Exception { private String assertHiveTableIsRegistered(String dbName, String tableName) throws Exception {
LOG.debug("Searching for table {}.{}", dbName, tableName); LOG.debug("Searching for table {}.{}", dbName, tableName);
String query = String.format( String query = String.format(
"%s as t where " + AtlasClient.NAME + " = '%s', db where " + AtlasClient.NAME + " = '%s' and clusterName = '%s'" + " select t", "%s as t where " + AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME + " = '%s', db where " + AtlasClient.NAME + " = '%s' and clusterName = '%s'" + " select t",
HiveDataTypes.HIVE_TABLE.getName(), tableName.toLowerCase(), dbName.toLowerCase(), CLUSTER_NAME); HiveDataTypes.HIVE_TABLE.getName(), HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName), dbName.toLowerCase(), CLUSTER_NAME);
return assertEntityIsRegistered(query); return assertEntityIsRegistered(query);
} }
......
...@@ -223,7 +223,7 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook { ...@@ -223,7 +223,7 @@ public class StormAtlasHook extends AtlasHook implements ISubmitterHook {
dataSetReferenceable.set("path", hdfsPathStr); dataSetReferenceable.set("path", hdfsPathStr);
dataSetReferenceable.set(AtlasClient.OWNER, stormConf.get("hdfs.kerberos.principal")); dataSetReferenceable.set(AtlasClient.OWNER, stormConf.get("hdfs.kerberos.principal"));
final Path hdfsPath = new Path(hdfsPathStr); final Path hdfsPath = new Path(hdfsPathStr);
dataSetReferenceable.set(AtlasClient.NAME, hdfsPath.getName()); dataSetReferenceable.set(AtlasClient.NAME, Path.getPathWithoutSchemeAndAuthority(hdfsPath).toString().toLowerCase());
break; break;
case "HiveBolt": case "HiveBolt":
......
...@@ -9,6 +9,7 @@ ATLAS-1060 Add composite indexes for exact match performance improvements for al ...@@ -9,6 +9,7 @@ ATLAS-1060 Add composite indexes for exact match performance improvements for al
ATLAS-1127 Modify creation and modification timestamps to Date instead of Long(sumasai) ATLAS-1127 Modify creation and modification timestamps to Date instead of Long(sumasai)
ALL CHANGES: ALL CHANGES:
ATLAS-1139 Parameter name of a HDFS DataSet entity should contain filesystem path (svimal2106 via sumasai)
ATLAS-1200 Error Catalog enhancement (apoorvnaik via sumasai) ATLAS-1200 Error Catalog enhancement (apoorvnaik via sumasai)
ATLAS-1207 Dataset exists query in lineage APIs takes longer (shwethags) ATLAS-1207 Dataset exists query in lineage APIs takes longer (shwethags)
ATLAS-1232 added preCreate(), preDelete() in typedef persistence, to enable edge creation for references in a later stage (mneethiraj) ATLAS-1232 added preCreate(), preDelete() in typedef persistence, to enable edge creation for references in a later stage (mneethiraj)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment