Commit e4ffcf24 by Pierre Padovani Committed by Madhan Neethiraj

ATLAS-2478: updates to support Elasticsearch 5.6.4 index backend (Tech Preview)

parent 8f981330
......@@ -18,6 +18,10 @@
package org.apache.atlas.repository;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasException;
import org.apache.commons.configuration.Configuration;
/**
* Repository Constants.
*
......@@ -52,12 +56,12 @@ public final class Constants {
/**
* Properties for type store graph.
*/
public static final String TYPE_CATEGORY_PROPERTY_KEY = INTERNAL_PROPERTY_KEY_PREFIX + "type.category";
public static final String VERTEX_TYPE_PROPERTY_KEY = INTERNAL_PROPERTY_KEY_PREFIX + "type";
public static final String TYPENAME_PROPERTY_KEY = INTERNAL_PROPERTY_KEY_PREFIX + "type.name";
public static final String TYPEDESCRIPTION_PROPERTY_KEY = INTERNAL_PROPERTY_KEY_PREFIX + "type.description";
public static final String TYPEVERSION_PROPERTY_KEY = INTERNAL_PROPERTY_KEY_PREFIX + "type.version";
public static final String TYPEOPTIONS_PROPERTY_KEY = INTERNAL_PROPERTY_KEY_PREFIX + "type.options";
public static final String TYPE_CATEGORY_PROPERTY_KEY = getTypePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "type.category");
public static final String VERTEX_TYPE_PROPERTY_KEY = getTypePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "type");
public static final String TYPENAME_PROPERTY_KEY = getTypePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "type.name");
public static final String TYPEDESCRIPTION_PROPERTY_KEY = getTypePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "type.description");
public static final String TYPEVERSION_PROPERTY_KEY = getTypePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "type.version");
public static final String TYPEOPTIONS_PROPERTY_KEY = getTypePropertyKey(INTERNAL_PROPERTY_KEY_PREFIX + "type.options");
// relationship def constants
public static final String RELATIONSHIPTYPE_END1_KEY = "endDef1";
......@@ -135,4 +139,19 @@ public final class Constants {
private Constants() {
}
private static String getTypePropertyKey(String defaultKey) {
try {
Configuration configuration = ApplicationProperties.get();
if (configuration.containsKey("atlas.graph.index.search.backend") &&
configuration.getString("atlas.graph.index.search.backend").equals("elasticsearch")) {
return defaultKey.replaceAll("\\.", "_");
}
return defaultKey;
} catch (AtlasException e) {
return defaultKey;
}
}
}
......@@ -58,10 +58,25 @@ atlas.graph.index.search.solr.wait-searcher=true
#Solr http mode properties
#atlas.graph.index.search.solr.mode=http
#atlas.graph.index.search.solr.http-urls=http://localhost:8983/solr
# ElasticSearch support (Tech Preview)
# Comment out above solr configuration, and uncomment the following two lines. Additionally, make sure the
# hostname field is set to a comma delimited set of elasticsearch master nodes, or an ELB that fronts the masters.
#
# Elasticsearch does not provide authentication out of the box, but does provide an option with the X-Pack product
# https://www.elastic.co/products/x-pack/security
#
# Alternatively, the JanusGraph documentation provides some tips on how to secure Elasticsearch without additional
# plugins: http://docs.janusgraph.org/latest/elasticsearch.html
#atlas.graph.index.hostname=localhost
#atlas.graph.index.search.elasticsearch.client-only=true
</graph.index.properties>
<hbase.embedded>false</hbase.embedded>
<solr.embedded>false</solr.embedded>
<entity.repository.properties>atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.HBaseBasedAuditRepository</entity.repository.properties>
<cassandra.embedded>false</cassandra.embedded>
<elasticsearch.managed>false</elasticsearch.managed>
<entity.repository.properties>atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.HBaseBasedAuditRepository</entity.repository.properties>
</properties>
<profiles>
......@@ -148,13 +163,50 @@ atlas.graph.storage.lock.wait-time=300
</graph.storage.properties>
<graph.index.backend>elasticsearch</graph.index.backend>
<graph.index.properties>#ElasticSearch
atlas.graph.index.search.directory=${sys:atlas.home}/data/es
atlas.graph.index.search.elasticsearch.client-only=false
atlas.graph.index.search.elasticsearch.local-mode=true
atlas.graph.index.search.elasticsearch.create.sleep=2000
atlas.graph.index.hostname=localhost
atlas.graph.index.search.elasticsearch.client-only=true
</graph.index.properties>
<entity.repository.properties>atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.NoopEntityAuditRepository</entity.repository.properties>
<elasticsearch.managed>true</elasticsearch.managed>
<cassandra.embedded>false</cassandra.embedded>
<elasticsearch.dir>${project.build.directory}/elasticsearch</elasticsearch.dir>
<elasticsearch.tar>https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${elasticsearch.version}.tar.gz</elasticsearch.tar>
<elasticsearch.folder>elasticsearch-${elasticsearch.version}</elasticsearch.folder>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<!-- package elasticsearch -->
<execution>
<id>elasticsearch</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target name="Download Elasticsearch">
<mkdir dir="${elasticsearch.dir}" />
<mkdir dir="${project.basedir}/elasticsearch" />
<get src="${elasticsearch.tar}" dest="${project.basedir}/elasticsearch/${elasticsearch.folder}.tar.gz" usetimestamp="true" verbose="true" skipexisting="true" />
<untar src="${project.basedir}/elasticsearch/${elasticsearch.folder}.tar.gz" dest="${project.build.directory}/elasticsearch.temp" compression="gzip" />
<copy todir="${elasticsearch.dir}">
<fileset dir="${project.build.directory}/elasticsearch.temp/${elasticsearch.folder}">
<include name="**/*" />
</fileset>
</copy>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<!-- profile to configure external hbase and solr with the distribution -->
......
......@@ -47,6 +47,7 @@ HBASE_CONF_DIR = "HBASE_CONF_DIR"
MANAGE_LOCAL_HBASE = "MANAGE_LOCAL_HBASE"
MANAGE_LOCAL_SOLR = "MANAGE_LOCAL_SOLR"
MANAGE_EMBEDDED_CASSANDRA = "MANAGE_EMBEDDED_CASSANDRA"
MANAGE_LOCAL_ELASTICSEARCH = "MANAGE_LOCAL_ELASTICSEARCH"
SOLR_BIN = "SOLR_BIN"
SOLR_CONF = "SOLR_CONF"
SOLR_PORT = "SOLR_PORT"
......@@ -58,7 +59,7 @@ DEFAULT_SOLR_REPLICATION_FACTOR = "1"
ENV_KEYS = ["JAVA_HOME", ATLAS_OPTS, ATLAS_SERVER_OPTS, ATLAS_SERVER_HEAP, ATLAS_LOG, ATLAS_PID, ATLAS_CONF,
"ATLASCPPATH", ATLAS_DATA, ATLAS_HOME, ATLAS_WEBAPP, HBASE_CONF_DIR, SOLR_PORT, MANAGE_LOCAL_HBASE,
MANAGE_LOCAL_SOLR, MANAGE_EMBEDDED_CASSANDRA]
MANAGE_LOCAL_SOLR, MANAGE_EMBEDDED_CASSANDRA, MANAGE_LOCAL_ELASTICSEARCH]
IS_WINDOWS = platform.system() == "Windows"
ON_POSIX = 'posix' in sys.builtin_module_names
CONF_FILE="atlas-application.properties"
......@@ -107,6 +108,9 @@ def zookeeperBinDir(dir):
def solrBinDir(dir):
return os.environ.get(SOLR_BIN, os.path.join(dir, "solr", BIN))
def elasticsearchBinDir(dir):
return os.environ.get(SOLR_BIN, os.path.join(dir, "elasticsearch", BIN))
def solrConfDir(dir):
return os.environ.get(SOLR_CONF, os.path.join(dir, "solr", CONFIG_SETS_CONF))
......@@ -448,6 +452,12 @@ def is_solr_local(confdir):
confdir = os.path.join(confdir, CONF_FILE)
return grep(confdir, SOLR_INDEX_CONF_ENTRY) is not None and grep(confdir, SOLR_INDEX_LOCAL_CONF_ENTRY) is not None
def is_elasticsearch_local():
if os.environ.get(MANAGE_LOCAL_ELASTICSEARCH, "False").lower() == 'false':
return False
return True
def get_solr_zk_url(confdir):
confdir = os.path.join(confdir, CONF_FILE)
return getConfig(confdir, SOLR_INDEX_ZK_URL)
......@@ -520,6 +530,19 @@ def run_zookeeper(dir, action, logdir = None, wait=True):
return runProcess(cmd, logdir, False, wait)
def start_elasticsearch(dir, logdir = None, wait=True):
elasticsearchScript = "elasticsearch"
if IS_WINDOWS:
elasticsearchScript = "elasticsearch.bat"
cmd = [os.path.join(dir, elasticsearchScript), '-d', '-p', os.path.join(logdir, 'elasticsearch.pid')]
processVal = runProcess(cmd, logdir, False, wait)
sleep(6)
return processVal
def run_solr(dir, action, zk_url = None, port = None, logdir = None, wait=True):
solrScript = "solr"
......
......@@ -134,6 +134,12 @@ def main():
mc.create_solr_collection(mc.solrBinDir(atlas_home), mc.solrConfDir(atlas_home), "edge_index", logdir)
mc.create_solr_collection(mc.solrBinDir(atlas_home), mc.solrConfDir(atlas_home), "fulltext_index", logdir)
#elasticsearch setup
if mc.is_elasticsearch_local():
print "configured for local elasticsearch."
mc.start_elasticsearch(mc.elasticsearchBinDir(atlas_home), logdir)
print "elasticsearch started."
web_app_path = os.path.join(web_app_dir, "atlas")
if (mc.isCygwin()):
web_app_path = mc.convertCygwinPath(web_app_path)
......
......@@ -71,8 +71,38 @@ def main():
mc.run_solr(mc.solrBinDir(atlas_home), "stop", None, mc.solrPort(), None, True)
if mc.is_cassandra_local(confdir):
mc.run_zookeeper(mc.zookeeperBinDir(atlas_home), "stop")
if mc.is_cassandra_local(confdir):
mc.run_zookeeper(mc.zookeeperBinDir(atlas_home), "stop")
# stop elasticsearch
if mc.is_elasticsearch_local():
logdir = os.path.join(atlas_home, 'logs')
elastic_pid_file = os.path.join(logdir, 'elasticsearch.pid')
try:
pf = file(elastic_pid_file, 'r')
pid = int(pf.read().strip())
pf.close()
except:
pid = None
if not pid:
sys.stderr.write("No process ID file found. Elasticsearch not running?\n")
return
if not mc.exist_pid(pid):
sys.stderr.write("Elasticsearch no longer running with pid %s\nImproper shutdown?\npid file deleted.\n" %pid)
os.remove(elastic_pid_file)
return
os.kill(pid, SIGTERM)
mc.wait_for_shutdown(pid, "stopping elasticsearch", 30)
if not mc.exist_pid(pid):
print "Elasticsearch stopped!!!\n"
# assuming kill worked since process check on windows is more involved...
if os.path.exists(elastic_pid_file):
os.remove(elastic_pid_file)
# stop hbase
if mc.is_hbase_local(confdir):
......
......@@ -62,4 +62,7 @@ export MANAGE_LOCAL_HBASE=${hbase.embedded}
export MANAGE_LOCAL_SOLR=${solr.embedded}
# indicates whether or not cassandra is the embedded backend for Atlas
export MANAGE_EMBEDDED_CASSANDRA=${cassandra.embedded}
\ No newline at end of file
export MANAGE_EMBEDDED_CASSANDRA=${cassandra.embedded}
# indicates whether or not a local instance of Elasticsearch should be started for Atlas
export MANAGE_LOCAL_ELASTICSEARCH=${elasticsearch.managed}
......@@ -87,6 +87,13 @@
</fileSet>
<fileSet>
<directory>target/elasticsearch</directory>
<outputDirectory>elasticsearch</outputDirectory>
<fileMode>0755</fileMode>
<directoryMode>0755</directoryMode>
</fileSet>
<fileSet>
<directory>../logs</directory>
<outputDirectory>logs</outputDirectory>
<directoryMode>0777</directoryMode>
......
......@@ -20,7 +20,13 @@ If any further JanusGraph configuration needs to be setup, please prefix the pro
In addition to setting up configurations, please ensure that environment variable HBASE_CONF_DIR is setup to point to
the directory containing HBase configuration file hbase-site.xml.
---+++ Graph Search Index - Solr
---+++ Graph Index Search Engine
An index search engine is required for ATLAS. This search engine runs separately from the ATLAS server and from the
storage backend. Only two search engines are currently supported: Solr and Elasticsearch. Pick the search engine
best suited for your environment and follow the configuration instructions below.
---++++ Graph Search Index - Solr
Solr installation in Cloud mode is a prerequisite for Apache Atlas use. Set the following properties to configure JanusGraph to use Solr as the index search engine.
<verbatim>
......@@ -38,6 +44,14 @@ atlas.graph.index.search.solr.zookeeper-connect-timeout=60000
atlas.graph.index.search.solr.zookeeper-session-timeout=60000
</verbatim>
---++++ Graph Search Index - Elasticsearch (Tech Preview)
Elasticsearch is a prerequisite for Apache Atlas use. Set the following properties to configure JanusGraph to use Elasticsearch as the index search engine.
<verbatim>
atlas.graph.index.search.backend=elasticsearch
atlas.graph.index.search.hostname=<hostname(s) of the Elasticsearch master nodes comma separated>
atlas.graph.index.search.elasticsearch.client-only=true</verbatim>
---++ Search Configs
Search APIs (DSL, basic search, full-text search) support pagination and have optional limit and offset arguments. Following configs are related to search pagination
......
......@@ -170,8 +170,10 @@ HA guarantees HBase provides. In order to configure Atlas to use HBase in HA mod
---++ Index Store
As described above, Atlas indexes metadata through JanusGraph to support full text search queries. In order to provide HA
for the index store, we recommend that Atlas be configured to use Solr as the backing index store for JanusGraph. In order
to configure Atlas to use Solr in HA mode, do the following:
for the index store, we recommend that Atlas be configured to use Solr or Elasticsearch as the backing index store for JanusGraph.
---+++ Solr
In order to configure Atlas to use Solr in HA mode, do the following:
* Choose an existing !SolrCloud cluster setup in HA mode to configure in Atlas (OR) Set up a new [[https://cwiki.apache.org/confluence/display/solr/SolrCloud][SolrCloud cluster]].
* Ensure Solr is brought up on at least 2 physical hosts for redundancy, and each host runs a Solr node.
......@@ -179,6 +181,14 @@ to configure Atlas to use Solr in HA mode, do the following:
* Create the !SolrCloud collections required by Atlas, as described in [[InstallationSteps][Installation Steps]]
* Refer to the [[Configuration][Configuration page]] for the options to configure in atlas.properties to setup Atlas with Solr.
---+++ Elasticsearch (Tech Preview)
In order to configure Atlas to use Elasticsearch in HA mode, do the following:
* Choose an existing Elasticsearch cluster setup, (OR) setup a new cluster [[https://www.elastic.co/guide/en/elasticsearch/reference/5.6/setup.html][Elasticsearch cluster]].
* Ensure that Elasticsearch is brought up on at least five physical hosts for redundancy.
* A replica count of 3 is recommended
* Refer to the [[Configuration][Configuration page]] for the options to configure in atlas.properties to setup Atlas with Elasticsearch.
---++ Notification Server
Metadata notification events from Hooks are sent to Atlas by writing them to a Kafka topic called *ATLAS_HOOK*. Similarly, events from
......
......@@ -185,6 +185,24 @@ Pre-requisites for running Solr in cloud mode
* !SolrCloud has support for replication and sharding. It is highly recommended to use !SolrCloud with at least two Solr nodes running on different servers with replication enabled.
If using !SolrCloud, then you also need !ZooKeeper installed and configured with 3 or 5 !ZooKeeper nodes
*Configuring Elasticsearch as the Indexing Backend for the Graph Repository (Tech Preview)*
By default, Atlas uses JanusGraph as the graph repository and is the only graph repository implementation available currently. For configuring JanusGraph to work with Elasticsearch, please follow the instructions below
* Install an Elasticsearch cluster. The version currently supported is 5.6.4, and can be acquired from: https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.6.4.tar.gz
* For simple testing a single Elasticsearch node can be started by using the 'elasticsearch' command in the bin directory of the Elasticsearch distribution.
* Change ATLAS configuration to point to the Elasticsearch instance setup. Please make sure the following configurations are set to the below values in ATLAS_HOME/conf/atlas-application.properties
<verbatim>
atlas.graph.index.search.backend=elasticsearch
atlas.graph.index.search.hostname=<the hostname(s) of the Elasticsearch master nodes comma separated>
atlas.graph.index.search.elasticsearch.client-only=true
* Restart Atlas
For more information on JanusGraph solr configuration , please refer http://docs.janusgraph.org/0.2.0/elasticsearch.html
*Configuring Kafka Topics*
Atlas uses Kafka to ingest metadata from other components at runtime. This is described in the [[Architecture][Architecture page]]
......
......@@ -119,6 +119,12 @@
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>${elasticsearch.version}</version>
</dependency>
<dependency>
<groupId>org.janusgraph</groupId>
<artifactId>janusgraph-es</artifactId>
<version>${janus.version}</version>
......
......@@ -543,6 +543,7 @@
<hbase.version>1.1.2</hbase.version>
<solr.version>5.5.1</solr.version>
<kafka.version>1.0.0</kafka.version>
<elasticsearch.version>5.6.4</elasticsearch.version>
<kafka.scala.binary.version>2.11</kafka.scala.binary.version>
<curator.version>2.11.0</curator.version>
<zookeeper.version>3.4.6</zookeeper.version>
......@@ -672,7 +673,7 @@
<graphdb.backend.impl>org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase</graphdb.backend.impl>
<graph.index.backend>solr</graph.index.backend>
<tests.solr.embedded>true</tests.solr.embedded>
<distro.exclude.packages>WEB-INF/lib/je-*.jar,WEB-INF/lib/elasticsearch-*.jar,WEB-INF/lib/solr-test-framework-*.jar, WEB-INF/lib/jts-*.jar,WEB-INF/lib/logback-*.jar</distro.exclude.packages>
<distro.exclude.packages>WEB-INF/lib/je-*.jar,WEB-INF/lib/solr-test-framework-*.jar, WEB-INF/lib/jts-*.jar,WEB-INF/lib/logback-*.jar</distro.exclude.packages>
</properties>
</profile>
......@@ -691,7 +692,7 @@
<graphdb.backend.impl>org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase</graphdb.backend.impl>
<graph.index.backend>solr</graph.index.backend>
<tests.solr.embedded>true</tests.solr.embedded>
<distro.exclude.packages>WEB-INF/lib/je-*.jar,WEB-INF/lib/elasticsearch-*.jar,WEB-INF/lib/solr-test-framework-*.jar, WEB-INF/lib/jts-*.jar,WEB-INF/lib/logback-*.jar</distro.exclude.packages>
<distro.exclude.packages>WEB-INF/lib/je-*.jar,WEB-INF/lib/solr-test-framework-*.jar, WEB-INF/lib/jts-*.jar,WEB-INF/lib/logback-*.jar</distro.exclude.packages>
</properties>
</profile>
......
......@@ -395,6 +395,18 @@
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>${elasticsearch.version}</version>
<exclusions>
<exclusion>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-graphdb-common</artifactId>
<classifier>tests</classifier>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment