Commit fd468f45 by Suma Shivaprasad

ATLAS-361 Add validation when index backends are switched in ATLAS configuration

parent bf5672c5
......@@ -41,6 +41,7 @@ atlas.graph.storage.lock.wait-time=10000
#atlas.graph.index.search.solr.http-urls=http://localhost:8983/solr
# Graph Search Index
#ElasticSearch
atlas.graph.index.search.backend=elasticsearch
atlas.graph.index.search.directory=${sys:atlas.home}/data/es
atlas.graph.index.search.elasticsearch.client-only=false
......
......@@ -79,7 +79,39 @@ atlas.graph.index.search.elasticsearch.create.sleep=2000
atlas.graph.index.search.solr.zookeeper-url=<the ZK quorum setup for solr as comma separated value> eg: 10.1.6.4:2181,10.1.6.5:2181
</verbatim>
---+++ Choosing between Persistence and Indexing Backends
Refer http://s3.thinkaurelius.com/docs/titan/0.5.4/bdb.html and http://s3.thinkaurelius.com/docs/titan/0.5.4/hbase.html for choosing between the persistence backends.
BerkeleyDB is suitable for smaller data sets in the range of upto 10 million vertices with ACID gurantees.
HBase on the other hand doesnt provide ACID guarantees but is able to scale for larger graphs. HBase also provides HA inherently.
---+++ Choosing between Persistence Backends
Refer http://s3.thinkaurelius.com/docs/titan/0.5.4/bdb.html and http://s3.thinkaurelius.com/docs/titan/0.5.4/hbase.html for choosing between the persistence backends.
BerkeleyDB is suitable for smaller data sets in the range of upto 10 million vertices with ACID gurantees.
HBase on the other hand doesnt provide ACID guarantees but is able to scale for larger graphs. HBase also provides HA inherently.
---+++ Choosing between Indexing Backends
Refer http://s3.thinkaurelius.com/docs/titan/0.5.4/elasticsearch.html and http://s3.thinkaurelius.com/docs/titan/0.5.4/solr.html for chossing between ElasticSarch and Solr.
Solr in cloud mode is the recommended setup.
---+++ Switching Persistence Backend
For switching the storage backend from BerkeleyDB to HBase and vice versa, refer the documentation for "Graph Persistence Engine" described above and restart ATLAS.
The data in the indexing backend needs to be cleared else there will be discrepancies between the storage and indexing backend which could result in errors during the search.
ElasticSearch runs by default in embedded mode and the data could easily be cleared by deleting the ATLAS_HOME/data/es directory.
For Solr, the collections which were created during ATLAS Installation - vertex_index, edge_index, fulltext_index could be deleted which will cleanup the indexes
---+++ Switching Index Backend
Switching the Index backend requires clearing the persistence backend data. Otherwise there will be discrepancies between the persistence and index backends since switching the indexing backend means index data will be lost.
This leads to "Fulltext" queries not working on the existing data
For clearing the data for BerkeleyDB, delete the ATLAS_HOME/data/berkeley directory
For clearing the data for HBase, in Hbase shell, run 'disable titan' and 'drop titan'
---+++ Hive Lineage Configs
The higher layer services like hive lineage, schema, etc. are driven by the type system and this
section encodes the specific types for the hive data model.
......
......@@ -9,6 +9,7 @@ ATLAS-54 Rename configs in hive hook (shwethags)
ATLAS-3 Mixed Index creation fails with Date types (sumasai via shwethags)
ALL CHANGES:
ATLAS-361 Add validation when index backends are switched in ATLAS configuration (sumasai via shwethags)
ATLAS-171 Ability to update type definition(shwethags via sumasai)
ATLAS-352 Improve write performance on type and entity creation with Hbase (sumasai)
ATLAS-350 Document jaas config details for atlas (tbeerbower via shwethags)
......
......@@ -22,8 +22,14 @@ import com.google.common.collect.ImmutableMap;
import com.google.inject.Provides;
import com.thinkaurelius.titan.core.TitanFactory;
import com.thinkaurelius.titan.core.TitanGraph;
import com.thinkaurelius.titan.core.TitanTransaction;
import com.thinkaurelius.titan.core.schema.TitanManagement;
import com.thinkaurelius.titan.diskstorage.Backend;
import com.thinkaurelius.titan.diskstorage.StandardIndexProvider;
import com.thinkaurelius.titan.diskstorage.indexing.IndexInformation;
import com.thinkaurelius.titan.diskstorage.solr.Solr5Index;
import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration;
import com.thinkaurelius.titan.graphdb.database.StandardTitanGraph;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasException;
import org.apache.commons.configuration.Configuration;
......@@ -35,6 +41,7 @@ import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* Default implementation for Graph Provider that doles out Titan Graph.
......@@ -46,7 +53,13 @@ public class TitanGraphProvider implements GraphProvider<TitanGraph> {
/**
* Constant for the configuration property that indicates the prefix.
*/
private static final String GRAPH_PREFIX = "atlas.graph";
public static final String GRAPH_PREFIX = "atlas.graph";
public static final String INDEX_BACKEND_CONF = "index.search.backend";
public static final String INDEX_BACKEND_LUCENE = "lucene";
public static final String INDEX_BACKEND_ES = "elasticsearch";
private static volatile TitanGraph graphInstance;
......@@ -85,9 +98,9 @@ public class TitanGraphProvider implements GraphProvider<TitanGraph> {
}
public static TitanGraph getGraphInstance() {
if(graphInstance == null) {
if (graphInstance == null) {
synchronized (TitanGraphProvider.class) {
if(graphInstance == null) {
if (graphInstance == null) {
Configuration config;
try {
config = getConfiguration();
......@@ -96,12 +109,33 @@ public class TitanGraphProvider implements GraphProvider<TitanGraph> {
}
graphInstance = TitanFactory.open(config);
validateIndexBackend(config);
}
}
}
return graphInstance;
}
public static void clear() {
synchronized (TitanGraphProvider.class) {
graphInstance.shutdown();
graphInstance = null;
}
}
static void validateIndexBackend(Configuration config) {
String configuredIndexBackend = config.getString(INDEX_BACKEND_CONF);
TitanManagement managementSystem = graphInstance.getManagementSystem();
String currentIndexBackend = managementSystem.get(INDEX_BACKEND_CONF);
managementSystem.commit();
if(!configuredIndexBackend.equals(currentIndexBackend)) {
throw new RuntimeException("Configured Index Backend " + configuredIndexBackend + " differs from earlier configured Index Backend " + currentIndexBackend + ". Aborting!");
}
}
@Override
@Singleton
@Provides
......
......@@ -349,10 +349,10 @@ public class DefaultMetadataService implements MetadataService {
}
/**
* Return the list of entity names for the given type in the repository.
* Return the list of entity guids for the given type in the repository.
*
* @param entityType type
* @return list of entity names for the given type in the repository
* @return list of entity guids for the given type in the repository
*/
@Override
public List<String> getEntityList(String entityType) throws AtlasException {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment