Commit 72641fb7 by Madhan Neethiraj

ATLAS-1631: parameterized Gremlin queries for better performance

parent b9779eca
......@@ -19,6 +19,7 @@ package org.apache.atlas.repository.graphdb;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Map;
import java.util.Set;
import javax.script.Bindings;
......@@ -256,6 +257,20 @@ public interface AtlasGraph<V, E> {
GroovyExpression addOutputTransformationPredicate(GroovyExpression expr, boolean isSelect, boolean isPath);
/**
* Get an instance of the script engine to execute Gremlin queries
*
* @return script engine to execute Gremlin queries
*/
ScriptEngine getGremlinScriptEngine();
/**
* Release an instance of the script engine obtained with getGremlinScriptEngine()
*
* @param scriptEngine: ScriptEngine to release
*/
void releaseGremlinScriptEngine(ScriptEngine scriptEngine);
/**
* Executes a Gremlin script, returns an object with the result.
*
* @param gremlinQuery
......@@ -280,7 +295,7 @@ public interface AtlasGraph<V, E> {
*
* @throws ScriptException
*/
Object executeGremlinScript(ScriptEngine scriptEngine, Bindings bindings, String query, boolean isPath) throws ScriptException;
Object executeGremlinScript(ScriptEngine scriptEngine, Map<? extends String, ? extends Object> bindings, String query, boolean isPath) throws ScriptException;
/**
......
......@@ -63,12 +63,15 @@ import com.tinkerpop.blueprints.Element;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.blueprints.util.io.graphson.GraphSONWriter;
import com.tinkerpop.pipes.util.structures.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Titan 0.5.4 implementation of AtlasGraph.
*/
public class Titan0Graph implements AtlasGraph<Titan0Vertex, Titan0Edge> {
private static final Logger LOG = LoggerFactory.getLogger(Titan0Graph.class);
private final Set<String> multiProperties;
......@@ -282,25 +285,54 @@ public class Titan0Graph implements AtlasGraph<Titan0Vertex, Titan0Edge> {
}
@Override
public Object executeGremlinScript(ScriptEngine scriptEngine, Bindings bindings, String query, boolean isPath) throws ScriptException {
if(!bindings.containsKey("g")) {
bindings.put("g", getGraph());
public ScriptEngine getGremlinScriptEngine() {
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("gremlin-groovy");
//Do not cache script compilations due to memory implications
engine.getContext().setAttribute("#jsr223.groovy.engine.keep.globals", "phantom", ScriptContext.ENGINE_SCOPE);
return engine;
}
@Override
public void releaseGremlinScriptEngine(ScriptEngine scriptEngine) {
// no action needed
}
@Override
public Object executeGremlinScript(ScriptEngine scriptEngine, Map<? extends String, ? extends Object> userBindings, String query, boolean isPath) throws ScriptException {
if (LOG.isDebugEnabled()) {
LOG.debug("executeGremlinScript(query={}, userBindings={})", query, userBindings);
}
Bindings bindings = scriptEngine.createBindings();
if (userBindings != null) {
bindings.putAll(userBindings);
}
bindings.put("g", getGraph());
Object result = scriptEngine.eval(query, bindings);
return convertGremlinScriptResult(isPath, result);
return convertGremlinScriptResult(isPath, result);
}
private Object executeGremlinScript(String gremlinQuery) throws ScriptException {
Object result = null;
ScriptEngine engine = getGremlinScriptEngine();
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("gremlin-groovy");
try {
Bindings bindings = engine.createBindings();
bindings.put("g", getGraph());
//Do not cache script compilations due to memory implications
engine.getContext().setAttribute("#jsr223.groovy.engine.keep.globals", "phantom", ScriptContext.ENGINE_SCOPE);
Object result = engine.eval(gremlinQuery, bindings);
result = engine.eval(gremlinQuery, bindings);
} finally {
releaseGremlinScriptEngine(engine);
}
return result;
}
......
......@@ -290,14 +290,7 @@ public class Titan1Graph implements AtlasGraph<Titan1Vertex, Titan1Edge> {
}
@Override
public Object executeGremlinScript(String query, boolean isPath) throws ScriptException {
Object result = executeGremlinScript(query);
return convertGremlinValue(result);
}
private Object executeGremlinScript(String gremlinQuery) throws ScriptException {
public GremlinGroovyScriptEngine getGremlinScriptEngine() {
Set<String> extraImports = new HashSet<String>();
extraImports.add(java.util.function.Function.class.getName());
......@@ -307,29 +300,54 @@ public class Titan1Graph implements AtlasGraph<Titan1Vertex, Titan1Edge> {
CompilerCustomizerProvider provider = new DefaultImportCustomizerProvider(extraImports, extraStaticImports);
GremlinGroovyScriptEngine scriptEngine = new GremlinGroovyScriptEngine(provider);
return scriptEngine;
}
@Override
public void releaseGremlinScriptEngine(ScriptEngine scriptEngine) {
if (scriptEngine instanceof GremlinGroovyScriptEngine) {
try {
((GremlinGroovyScriptEngine)scriptEngine).close();
} catch (Exception e) {
// ignore
}
}
}
@Override
public Object executeGremlinScript(String query, boolean isPath) throws ScriptException {
Object result = executeGremlinScript(query);
return convertGremlinValue(result);
}
private Object executeGremlinScript(String gremlinQuery) throws ScriptException {
GremlinGroovyScriptEngine scriptEngine = getGremlinScriptEngine();
try {
Bindings bindings = scriptEngine.createBindings();
bindings.put("graph", getGraph());
bindings.put("g", getGraph().traversal());
Object result = scriptEngine.eval(gremlinQuery, bindings);
return result;
} finally {
try {
scriptEngine.close();
} catch (Exception e) {
throw new ScriptException(e);
}
releaseGremlinScriptEngine(scriptEngine);
}
}
@Override
public Object executeGremlinScript(ScriptEngine scriptEngine,
Bindings bindings, String query, boolean isPath)
Map<? extends String, ? extends Object> userBindings, String query, boolean isPath)
throws ScriptException {
Bindings bindings = scriptEngine.createBindings();
if(!bindings.containsKey("g")) {
bindings.putAll(userBindings);
bindings.put("g", getGraph());
}
Object result = scriptEngine.eval(query, bindings);
return convertGremlinValue(result);
}
......
......@@ -57,8 +57,10 @@ import scala.util.Either;
import scala.util.parsing.combinator.Parsers.NoSuccess;
import javax.inject.Inject;
import javax.script.ScriptEngine;
import javax.script.ScriptException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
......@@ -164,6 +166,7 @@ public class EntityDiscoveryService implements AtlasDiscoveryService {
LOG.debug("Executing basic search query: {} with type: {} and classification: {}", query, typeName, classification);
}
Map<String, Object> bindings = new HashMap<>();
QueryParams params = validateSearchParams(limit, offset);
String basicQuery = "g.V()";
......@@ -174,10 +177,9 @@ public class EntityDiscoveryService implements AtlasDiscoveryService {
throw new AtlasBaseException(UNKNOWN_TYPENAME, typeName);
}
String typeFilterExpr = gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_TYPE_FILTER);
bindings.put("typeNames", entityType.getTypeAndAllSubTypes());
basicQuery += String.format(typeFilterExpr,
StringUtils.join(entityType.getTypeAndAllSubTypes(), "','"));
basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_TYPE_FILTER);
ret.setType(typeName);
}
......@@ -189,24 +191,30 @@ public class EntityDiscoveryService implements AtlasDiscoveryService {
throw new AtlasBaseException(CLASSIFICATION_NOT_FOUND, classification);
}
String classificationFilterExpr = gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_CLASSIFICATION_FILTER);
bindings.put("traitNames", classificationType.getTypeAndAllSubTypes());
basicQuery += String.format(classificationFilterExpr,
StringUtils.join(classificationType.getTypeAndAllSubTypes(), "','"));
basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_CLASSIFICATION_FILTER);
ret.setClassification(classification);
}
if (StringUtils.isNotEmpty(query)) {
basicQuery += String.format(gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_QUERY_FILTER), query);
bindings.put("queryStr", query);
basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.BASIC_SEARCH_QUERY_FILTER);
ret.setQueryText(query);
}
basicQuery += String.format(gremlinQueryProvider.getQuery(AtlasGremlinQuery.TO_RANGE_LIST), params.offset(), params.limit());
bindings.put("offset", params.offset());
bindings.put("limit", params.limit());
basicQuery += gremlinQueryProvider.getQuery(AtlasGremlinQuery.TO_RANGE_LIST);
ScriptEngine scriptEngine = graph.getGremlinScriptEngine();
try {
Object result = graph.executeGremlinScript(basicQuery, false);
Object result = graph.executeGremlinScript(scriptEngine, bindings, basicQuery, false);
if (result instanceof List && CollectionUtils.isNotEmpty((List) result)) {
List queryResult = (List) result;
......@@ -225,6 +233,8 @@ public class EntityDiscoveryService implements AtlasDiscoveryService {
}
} catch (ScriptException e) {
throw new AtlasBaseException(DISCOVERY_QUERY_FAILED, basicQuery);
} finally {
graph.releaseGremlinScriptEngine(scriptEngine);
}
return ret;
......
......@@ -30,6 +30,7 @@ import org.apache.atlas.model.instance.AtlasStruct;
import org.apache.atlas.repository.store.graph.v1.EntityGraphRetriever;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.configuration.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -122,6 +123,10 @@ public class FullTextMapperV2 {
}
private void mapAttributes(Map<String, Object> attributes, AtlasEntityExtInfo entityExtInfo, StringBuilder sb, Set<String> processedGuids) throws AtlasBaseException {
if (MapUtils.isEmpty(attributes)) {
return;
}
for (Map.Entry<String, Object> attributeEntry : attributes.entrySet()) {
String attribKey = attributeEntry.getKey();
Object attrValue = attributeEntry.getValue();
......
......@@ -44,15 +44,15 @@ public class AtlasGremlin2QueryProvider extends AtlasGremlinQueryProvider {
case EXPORT_BY_GUID_CONNECTED_OUT_EDGE:
return "g.V('__guid', startGuid).outE().inV().has('__guid').__guid.dedup().toList()";
case EXPORT_TYPE_STARTS_WITH:
return "g.V().has('__typeName','%s').filter({it.'%s'.startsWith(attrValue)}).has('__guid').__guid.toList()";
return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).startsWith(attrValue)}).has('__guid').__guid.toList()";
case EXPORT_TYPE_ENDS_WITH:
return "g.V().has('__typeName','%s').filter({it.'%s'.endsWith(attrValue)}).has('__guid').__guid.toList()";
return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).endsWith(attrValue)}).has('__guid').__guid.toList()";
case EXPORT_TYPE_CONTAINS:
return "g.V().has('__typeName','%s').filter({it.'%s'.contains(attrValue)}).has('__guid').__guid.toList()";
return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).contains(attrValue)}).has('__guid').__guid.toList()";
case EXPORT_TYPE_MATCHES:
return "g.V().has('__typeName','%s').filter({it.'%s'.matches(attrValue)}).has('__guid').__guid.toList()";
return "g.V().has('__typeName',typeName).filter({it.getProperty(attrName).matches(attrValue)}).has('__guid').__guid.toList()";
case EXPORT_TYPE_DEFAULT:
return "g.V().has('__typeName','%s').has('%s', attrValue).has('__guid').__guid.toList()";
return "g.V().has('__typeName',typeName).has(attrName, attrValue).has('__guid').__guid.toList()";
case FULL_LINEAGE:
return "g.V('__guid', '%s').as('src').in('%s').out('%s')." +
"loop('src', {((it.path.contains(it.object)) ? false : true)}, " +
......@@ -66,13 +66,13 @@ public class AtlasGremlin2QueryProvider extends AtlasGremlinQueryProvider {
"path().toList()";
case BASIC_SEARCH_QUERY_FILTER:
return ".has('entityText', com.thinkaurelius.titan.core.attribute.Text.CONTAINS, '%s')";
return ".has('entityText', com.thinkaurelius.titan.core.attribute.Text.CONTAINS, queryStr)";
case BASIC_SEARCH_TYPE_FILTER:
return ".has('__typeName', T.in, ['%s'])";
return ".has('__typeName', T.in, typeNames)";
case BASIC_SEARCH_CLASSIFICATION_FILTER:
return ".has('__traitNames', T.in, ['%s'])";
return ".has('__traitNames', T.in, traitNames)";
case TO_RANGE_LIST:
return " [%s..<%s].toList()";
return " [offset..<limit].toList()";
}
// Should never reach this point
return null;
......
......@@ -17,7 +17,6 @@
*/
package org.apache.atlas.web.resources;
import com.tinkerpop.gremlin.groovy.jsr223.GremlinGroovyScriptEngine;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.AtlasException;
import org.apache.atlas.AtlasServiceException;
......@@ -46,8 +45,7 @@ import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.script.Bindings;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptException;
import java.util.ArrayList;
import java.util.Collections;
......@@ -106,6 +104,8 @@ public class ExportService {
} catch(Exception ex) {
LOG.error("Operation failed: ", ex);
} finally {
atlasGraph.releaseGremlinScriptEngine(context.scriptEngine);
LOG.info("<== export(user={}, from={}): status {}", userName, requestingIP, context.result.getOperationStatus());
}
......@@ -182,8 +182,12 @@ public class ExportService {
continue;
}
String query = String.format(queryTemplate, typeName, attribute.getQualifiedName());
List<String> guids = executeGremlinQuery(query, "attrValue", attrValue.toString(), context);
context.bindings.clear();
context.bindings.put("typeName", typeName);
context.bindings.put("attrName", attribute.getQualifiedName());
context.bindings.put("attrValue", attrValue);
List<String> guids = executeGremlinQuery(queryTemplate, context);
if (CollectionUtils.isNotEmpty(guids)) {
for (String guid : guids) {
......@@ -263,7 +267,6 @@ public class ExportService {
return;
}
try {
for (TraversalDirection direction : directions) {
String query = getQueryForTraversalDirection(direction);
......@@ -271,7 +274,10 @@ public class ExportService {
LOG.debug("==> getConnectedEntityGuids({}): guidsToProcess {} query {}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size(), query);
}
List<String> guids = executeGremlinQuery(query, entity.getGuid(), context);
context.bindings.clear();
context.bindings.put("startGuid", entity.getGuid());
List<String> guids = executeGremlinQuery(query, context);
if (CollectionUtils.isEmpty(guids)) {
continue;
......@@ -302,10 +308,6 @@ public class ExportService {
LOG.debug("<== getConnectedEntityGuids({}): found {} guids; guidsToProcess {}", entity.getGuid(), guids.size(), context.guidsToProcess.size());
}
}
} catch (ScriptException e) {
LOG.error("Child entities could not be added for %s", entity.getGuid());
}
}
private String getQueryForTraversalDirection(TraversalDirection direction) {
......@@ -320,15 +322,16 @@ public class ExportService {
}
private void getEntityGuidsForFullFetch(AtlasEntity entity, ExportContext context) {
try {
String query = this.gremlinQueryProvider.getQuery(AtlasGremlinQuery.EXPORT_BY_GUID_FULL);
if (LOG.isDebugEnabled()) {
LOG.debug("==> getEntityGuidsForFullFetch({}): guidsToProcess {}", AtlasTypeUtil.getAtlasObjectId(entity), context.guidsToProcess.size());
}
List<String> result = executeGremlinQuery(query, entity.getGuid(), context);
String query = this.gremlinQueryProvider.getQuery(AtlasGremlinQuery.EXPORT_BY_GUID_FULL);
context.bindings.clear();
context.bindings.put("startGuid", entity.getGuid());
List<String> result = executeGremlinQuery(query, context);
if (result == null) {
return;
......@@ -347,9 +350,6 @@ public class ExportService {
if (LOG.isDebugEnabled()) {
LOG.debug("<== getEntityGuidsForFullFetch({}): found {} guids; guidsToProcess {}", entity.getGuid(), result.size(), context.guidsToProcess.size());
}
} catch (ScriptException e) {
LOG.error("Child entities could not be added for %s", entity.getGuid());
}
}
private void addEntity(AtlasEntity entity, ExportContext context) throws AtlasBaseException {
......@@ -393,21 +393,9 @@ public class ExportService {
}
}
private List<String> executeGremlinQuery(String query, String guid, ExportContext context) throws ScriptException {
context.bindings.put("startGuid", guid);
return (List<String>) atlasGraph.executeGremlinScript(context.scriptEngine,
context.bindings,
query,
false);
}
private List<String> executeGremlinQuery(String query, String parameterName, String parameterValue, ExportContext context) {
context.bindings.put(parameterName, parameterValue);
private List<String> executeGremlinQuery(String query, ExportContext context) {
try {
return (List<String>) atlasGraph.executeGremlinScript(context.scriptEngine,
context.bindings,
query,
false);
return (List<String>) atlasGraph.executeGremlinScript(context.scriptEngine, context.bindings, query, false);
} catch (ScriptException e) {
LOG.error("Script execution failed for query: ", query, e);
return null;
......@@ -451,8 +439,8 @@ public class ExportService {
final AtlasExportResult result;
final ZipSink sink;
private final GremlinGroovyScriptEngine scriptEngine;
private final Bindings bindings;
private final ScriptEngine scriptEngine;
private final Map<String, Object> bindings;
private final ExportFetchType fetchType;
private final String matchType;
......@@ -460,14 +448,8 @@ public class ExportService {
this.result = result;
this.sink = sink;
this.scriptEngine = new GremlinGroovyScriptEngine();
//Do not cache script compilations due to memory implications
scriptEngine.getContext().setAttribute("#jsr223.groovy.engine.keep.globals",
"phantom",
ScriptContext.ENGINE_SCOPE);
bindings = scriptEngine.getBindings(ScriptContext.ENGINE_SCOPE);
scriptEngine = atlasGraph.getGremlinScriptEngine();
bindings = new HashMap<>();
fetchType = getFetchType(result.getRequest());
matchType = getMatchType(result.getRequest());
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment