Commit 76b05603 by Suma Shivaprasad

ATLAS-948 import-hive should allow an option to continue after failure(sumasai)

parent 377bc635
...@@ -33,6 +33,10 @@ import org.apache.atlas.typesystem.json.InstanceSerialization; ...@@ -33,6 +33,10 @@ import org.apache.atlas.typesystem.json.InstanceSerialization;
import org.apache.atlas.typesystem.json.TypesSerialization; import org.apache.atlas.typesystem.json.TypesSerialization;
import org.apache.atlas.typesystem.persistence.Id; import org.apache.atlas.typesystem.persistence.Id;
import org.apache.atlas.utils.AuthenticationUtil; import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Options;
import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
...@@ -111,17 +115,16 @@ public class HiveMetaStoreBridge { ...@@ -111,17 +115,16 @@ public class HiveMetaStoreBridge {
return atlasClient; return atlasClient;
} }
void importHiveMetadata() throws Exception { void importHiveMetadata(boolean failOnError) throws Exception {
LOG.info("Importing hive metadata"); LOG.info("Importing hive metadata");
importDatabases(); importDatabases(failOnError);
} }
private void importDatabases() throws Exception { private void importDatabases(boolean failOnError) throws Exception {
List<String> databases = hiveClient.getAllDatabases(); List<String> databases = hiveClient.getAllDatabases();
for (String databaseName : databases) { for (String databaseName : databases) {
Referenceable dbReference = registerDatabase(databaseName); Referenceable dbReference = registerDatabase(databaseName);
importTables(dbReference, databaseName, failOnError);
importTables(dbReference, databaseName);
} }
} }
...@@ -254,52 +257,68 @@ public class HiveMetaStoreBridge { ...@@ -254,52 +257,68 @@ public class HiveMetaStoreBridge {
/** /**
* Imports all tables for the given db * Imports all tables for the given db
* @param databaseName
* @param databaseReferenceable * @param databaseReferenceable
* @param databaseName
* @param failOnError
* @throws Exception * @throws Exception
*/ */
private void importTables(Referenceable databaseReferenceable, String databaseName) throws Exception { private int importTables(Referenceable databaseReferenceable, String databaseName, final boolean failOnError) throws Exception {
int tablesImported = 0;
List<String> hiveTables = hiveClient.getAllTables(databaseName); List<String> hiveTables = hiveClient.getAllTables(databaseName);
LOG.info("Importing tables {} for db {}", hiveTables.toString(), databaseName); LOG.info("Importing tables {} for db {}", hiveTables.toString(), databaseName);
for (String tableName : hiveTables) { for (String tableName : hiveTables) {
Table table = hiveClient.getTable(databaseName, tableName); try {
Referenceable tableReferenceable = registerTable(databaseReferenceable, table); Table table = hiveClient.getTable(databaseName, tableName);
if (table.getTableType() == TableType.EXTERNAL_TABLE){ Referenceable tableReferenceable = registerTable(databaseReferenceable, table);
String tableQualifiedName = getTableQualifiedName(clusterName, table); tablesImported++;
Referenceable process = getProcessReference(tableQualifiedName); if (table.getTableType() == TableType.EXTERNAL_TABLE) {
if (process == null){ String tableQualifiedName = getTableQualifiedName(clusterName, table);
LOG.info("Attempting to register create table process for {}", tableQualifiedName); Referenceable process = getProcessReference(tableQualifiedName);
Referenceable lineageProcess = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName()); if (process == null) {
ArrayList<Referenceable> sourceList = new ArrayList<>(); LOG.info("Attempting to register create table process for {}", tableQualifiedName);
ArrayList<Referenceable> targetList = new ArrayList<>(); Referenceable lineageProcess = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
String tableLocation = table.getDataLocation().toString(); ArrayList<Referenceable> sourceList = new ArrayList<>();
Referenceable path = fillHDFSDataSet(tableLocation); ArrayList<Referenceable> targetList = new ArrayList<>();
String query = getCreateTableString(table, tableLocation); String tableLocation = table.getDataLocation().toString();
sourceList.add(path); Referenceable path = fillHDFSDataSet(tableLocation);
targetList.add(tableReferenceable); String query = getCreateTableString(table, tableLocation);
lineageProcess.set("inputs", sourceList); sourceList.add(path);
lineageProcess.set("outputs", targetList); targetList.add(tableReferenceable);
lineageProcess.set("userName", table.getOwner()); lineageProcess.set("inputs", sourceList);
lineageProcess.set("startTime", new Date(System.currentTimeMillis())); lineageProcess.set("outputs", targetList);
lineageProcess.set("endTime", new Date(System.currentTimeMillis())); lineageProcess.set("userName", table.getOwner());
lineageProcess.set("operationType", "CREATETABLE"); lineageProcess.set("startTime", new Date(System.currentTimeMillis()));
lineageProcess.set("queryText", query); lineageProcess.set("endTime", new Date(System.currentTimeMillis()));
lineageProcess.set("queryId", query); lineageProcess.set("operationType", "CREATETABLE");
lineageProcess.set("queryPlan", "{}"); lineageProcess.set("queryText", query);
lineageProcess.set("clusterName", clusterName); lineageProcess.set("queryId", query);
List<String> recentQueries = new ArrayList<>(1); lineageProcess.set("queryPlan", "{}");
recentQueries.add(query); lineageProcess.set("clusterName", clusterName);
lineageProcess.set("recentQueries", recentQueries); List<String> recentQueries = new ArrayList<>(1);
lineageProcess.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName); recentQueries.add(query);
lineageProcess.set(AtlasClient.NAME, query); lineageProcess.set("recentQueries", recentQueries);
registerInstance(lineageProcess); lineageProcess.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, tableQualifiedName);
lineageProcess.set(AtlasClient.NAME, query);
registerInstance(lineageProcess);
} else {
LOG.info("Process {} is already registered", process.toString());
}
} }
else { } catch (Exception e) {
LOG.info("Process {} is already registered", process.toString()); LOG.error("Import failed for hive_table {} ", tableName, e);
if (failOnError) {
throw e;
} }
} }
} }
if ( tablesImported == hiveTables.size()) {
LOG.info("Successfully imported all {} tables from {} ", tablesImported, databaseName);
} else {
LOG.error("Unable to import {} tables out of {} tables from {}", tablesImported, hiveTables.size(), databaseName);
}
return tablesImported;
} }
/** /**
...@@ -618,7 +637,7 @@ public class HiveMetaStoreBridge { ...@@ -618,7 +637,7 @@ public class HiveMetaStoreBridge {
} }
} }
public static void main(String[] argv) throws Exception { public static void main(String[] args) throws Exception {
Configuration atlasConf = ApplicationProperties.get(); Configuration atlasConf = ApplicationProperties.get();
String atlasEndpoint = atlasConf.getString(ATLAS_ENDPOINT, DEFAULT_DGI_URL); String atlasEndpoint = atlasConf.getString(ATLAS_ENDPOINT, DEFAULT_DGI_URL);
...@@ -632,8 +651,17 @@ public class HiveMetaStoreBridge { ...@@ -632,8 +651,17 @@ public class HiveMetaStoreBridge {
atlasClient = new AtlasClient(ugi, ugi.getShortUserName(), atlasEndpoint); atlasClient = new AtlasClient(ugi, ugi.getShortUserName(), atlasEndpoint);
} }
Options options = new Options();
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse( options, args);
boolean failOnError = false;
if (cmd.hasOption("failOnError")) {
failOnError = true;
}
HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(new HiveConf(), atlasClient); HiveMetaStoreBridge hiveMetaStoreBridge = new HiveMetaStoreBridge(new HiveConf(), atlasClient);
hiveMetaStoreBridge.registerHiveDataModel(); hiveMetaStoreBridge.registerHiveDataModel();
hiveMetaStoreBridge.importHiveMetadata(); hiveMetaStoreBridge.importHiveMetadata(failOnError);
} }
} }
...@@ -41,6 +41,8 @@ import org.testng.annotations.BeforeMethod; ...@@ -41,6 +41,8 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import scala.actors.threadpool.Arrays; import scala.actors.threadpool.Arrays;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.mockito.Mockito.argThat; import static org.mockito.Mockito.argThat;
...@@ -78,7 +80,7 @@ public class HiveMetaStoreBridgeTest { ...@@ -78,7 +80,7 @@ public class HiveMetaStoreBridgeTest {
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME); returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient); HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
bridge.importHiveMetadata(); bridge.importHiveMetadata(true);
// verify update is called // verify update is called
verify(atlasClient).updateEntity(eq("72e06b34-9151-4023-aa9d-b82103a50e76"), verify(atlasClient).updateEntity(eq("72e06b34-9151-4023-aa9d-b82103a50e76"),
...@@ -90,7 +92,7 @@ public class HiveMetaStoreBridgeTest { ...@@ -90,7 +92,7 @@ public class HiveMetaStoreBridgeTest {
public void testImportThatUpdatesRegisteredTable() throws Exception { public void testImportThatUpdatesRegisteredTable() throws Exception {
setupDB(hiveClient, TEST_DB_NAME); setupDB(hiveClient, TEST_DB_NAME);
Table hiveTable = setupTable(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME); List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME); returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
...@@ -99,12 +101,12 @@ public class HiveMetaStoreBridgeTest { ...@@ -99,12 +101,12 @@ public class HiveMetaStoreBridgeTest {
HiveDataTypes.HIVE_TABLE.getName(), false))).thenReturn( HiveDataTypes.HIVE_TABLE.getName(), false))).thenReturn(
getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77")); getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference()); when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, hiveTable); String processQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, hiveTables.get(0));
when(atlasClient.searchByDSL(HiveMetaStoreBridge.getProcessDSLQuery(HiveDataTypes.HIVE_PROCESS.getName(), when(atlasClient.searchByDSL(HiveMetaStoreBridge.getProcessDSLQuery(HiveDataTypes.HIVE_PROCESS.getName(),
processQualifiedName))).thenReturn(getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77")); processQualifiedName))).thenReturn(getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77"));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient); HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
bridge.importHiveMetadata(); bridge.importHiveMetadata(true);
// verify update is called on table // verify update is called on table
verify(atlasClient).updateEntity(eq("82e06b34-9151-4023-aa9d-b82103a50e77"), verify(atlasClient).updateEntity(eq("82e06b34-9151-4023-aa9d-b82103a50e77"),
...@@ -119,11 +121,15 @@ public class HiveMetaStoreBridgeTest { ...@@ -119,11 +121,15 @@ public class HiveMetaStoreBridgeTest {
getEntityReference("72e06b34-9151-4023-aa9d-b82103a50e76")); getEntityReference("72e06b34-9151-4023-aa9d-b82103a50e76"));
} }
private Table setupTable(Hive hiveClient, String databaseName, String tableName) throws HiveException { private List<Table> setupTables(Hive hiveClient, String databaseName, String... tableNames) throws HiveException {
when(hiveClient.getAllTables(databaseName)).thenReturn(Arrays.asList(new String[]{tableName})); List<Table> tables = new ArrayList<>();
Table testTable = createTestTable(databaseName, tableName); when(hiveClient.getAllTables(databaseName)).thenReturn(Arrays.asList(tableNames));
when(hiveClient.getTable(databaseName, tableName)).thenReturn(testTable); for(String tableName : tableNames) {
return testTable; Table testTable = createTestTable(databaseName, tableName);
when(hiveClient.getTable(databaseName, tableName)).thenReturn(testTable);
tables.add(testTable);
}
return tables;
} }
private void setupDB(Hive hiveClient, String databaseName) throws HiveException { private void setupDB(Hive hiveClient, String databaseName) throws HiveException {
...@@ -135,7 +141,8 @@ public class HiveMetaStoreBridgeTest { ...@@ -135,7 +141,8 @@ public class HiveMetaStoreBridgeTest {
@Test @Test
public void testImportWhenPartitionKeysAreNull() throws Exception { public void testImportWhenPartitionKeysAreNull() throws Exception {
setupDB(hiveClient, TEST_DB_NAME); setupDB(hiveClient, TEST_DB_NAME);
Table hiveTable = setupTable(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME); List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME);
Table hiveTable = hiveTables.get(0);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME); returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
...@@ -157,12 +164,65 @@ public class HiveMetaStoreBridgeTest { ...@@ -157,12 +164,65 @@ public class HiveMetaStoreBridgeTest {
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient); HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
try { try {
bridge.importHiveMetadata(); bridge.importHiveMetadata(true);
} catch (Exception e) { } catch (Exception e) {
Assert.fail("Partition with null key caused import to fail with exception ", e); Assert.fail("Partition with null key caused import to fail with exception ", e);
} }
} }
@Test
public void testImportContinuesWhenTableRegistrationFails() throws Exception {
setupDB(hiveClient, TEST_DB_NAME);
final String table2Name = TEST_TABLE_NAME + "_1";
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME, table2Name);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
when(hiveClient.getTable(TEST_DB_NAME, TEST_TABLE_NAME)).thenThrow(new RuntimeException("Timeout while reading data from hive metastore"));
when(atlasClient.searchByDSL(HiveMetaStoreBridge.getTableDSLQuery(CLUSTER_NAME, TEST_DB_NAME,
table2Name,
HiveDataTypes.HIVE_TABLE.getName(), false))).thenReturn(
getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, hiveTables.get(1));
when(atlasClient.searchByDSL(HiveMetaStoreBridge.getProcessDSLQuery(HiveDataTypes.HIVE_PROCESS.getName(),
processQualifiedName))).thenReturn(getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77"));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
try {
bridge.importHiveMetadata(false);
} catch (Exception e) {
Assert.fail("Table registration failed with exception", e);
}
}
@Test
public void testImportFailsWhenTableRegistrationFails() throws Exception {
setupDB(hiveClient, TEST_DB_NAME);
final String table2Name = TEST_TABLE_NAME + "_1";
List<Table> hiveTables = setupTables(hiveClient, TEST_DB_NAME, TEST_TABLE_NAME, table2Name);
returnExistingDatabase(TEST_DB_NAME, atlasClient, CLUSTER_NAME);
when(hiveClient.getTable(TEST_DB_NAME, TEST_TABLE_NAME)).thenThrow(new RuntimeException("Timeout while reading data from hive metastore"));
when(atlasClient.searchByDSL(HiveMetaStoreBridge.getTableDSLQuery(CLUSTER_NAME, TEST_DB_NAME,
table2Name,
HiveDataTypes.HIVE_TABLE.getName(), false))).thenReturn(
getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77"));
when(atlasClient.getEntity("82e06b34-9151-4023-aa9d-b82103a50e77")).thenReturn(createTableReference());
String processQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, hiveTables.get(1));
when(atlasClient.searchByDSL(HiveMetaStoreBridge.getProcessDSLQuery(HiveDataTypes.HIVE_PROCESS.getName(),
processQualifiedName))).thenReturn(getEntityReference("82e06b34-9151-4023-aa9d-b82103a50e77"));
HiveMetaStoreBridge bridge = new HiveMetaStoreBridge(CLUSTER_NAME, hiveClient, atlasClient);
try {
bridge.importHiveMetadata(true);
Assert.fail("Table registration is supposed to fail");
} catch (Exception e) {
//Expected
}
}
private JSONArray getEntityReference(String id) throws JSONException { private JSONArray getEntityReference(String id) throws JSONException {
return new JSONArray(String.format("[{\"$id$\":{\"id\":\"%s\"}}]", id)); return new JSONArray(String.format("[{\"$id$\":{\"id\":\"%s\"}}]", id));
} }
......
...@@ -6,6 +6,7 @@ INCOMPATIBLE CHANGES: ...@@ -6,6 +6,7 @@ INCOMPATIBLE CHANGES:
ALL CHANGES: ALL CHANGES:
ATLAS-948 import-hive should allow an option to continue after failure (sumasai)
ATLAS-954 Get hadoop classpath if command hadoop is in PATH (svimal2106 via sumasai) ATLAS-954 Get hadoop classpath if command hadoop is in PATH (svimal2106 via sumasai)
ATLAS-919 UI : Deleted references should be shown in red or filtered out (kevalbhatt18 via sumasai) ATLAS-919 UI : Deleted references should be shown in red or filtered out (kevalbhatt18 via sumasai)
ATLAS-927 aboutAtlas_tmpl.html has hard-coded project version (Kalyanikashikar via yhemanth) ATLAS-927 aboutAtlas_tmpl.html has hard-coded project version (Kalyanikashikar via yhemanth)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment