Commit 8792f162 by Bolke de Bruin Committed by Ashutosh Mestry

Use fulltext indices for dsl search

Per janusgraph documentation https://docs.janusgraph.org/latest/index-parameters.html strings are indexed as text by default. Atlas uses string search which is suboptimal and leads to significant performance loss. This switches to use fulltext predicates when available which give a significant speedup. Signed-off-by: 's avatarAshutosh Mestry <amestry@hortonworks.com>
parent e6b9e9ad
...@@ -43,9 +43,10 @@ enum GremlinClause { ...@@ -43,9 +43,10 @@ enum GremlinClause {
RANGE("range(%s, %s + %s)"), RANGE("range(%s, %s + %s)"),
SELECT("select('%s')"), SELECT("select('%s')"),
TO_LIST("toList()"), TO_LIST("toList()"),
TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"), STRING_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"),
TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textPrefix(%s))"), TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(%s))"),
TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textRegex(\".*\" + %s))"), TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsPrefix(%s))"),
TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(\".*\" + %s))"),
TRAIT("outE('classifiedAs').has('__name', within('%s')).outV()"), TRAIT("outE('classifiedAs').has('__name', within('%s')).outV()"),
ANY_TRAIT("or(has('__traitNames'), has('__propagatedTraitNames'))"), ANY_TRAIT("or(has('__traitNames'), has('__propagatedTraitNames'))"),
NO_TRAIT("and(hasNot('__traitNames'), hasNot('__propagatedTraitNames'))"), NO_TRAIT("and(hasNot('__traitNames'), hasNot('__propagatedTraitNames'))"),
......
...@@ -171,6 +171,7 @@ public class GremlinQueryComposer { ...@@ -171,6 +171,7 @@ public class GremlinQueryComposer {
} }
String currentType = context.getActiveTypeName(); String currentType = context.getActiveTypeName();
IdentifierHelper.Info org = null; IdentifierHelper.Info org = null;
IdentifierHelper.Info lhsI = createInfo(lhs); IdentifierHelper.Info lhsI = createInfo(lhs);
if (!lhsI.isPrimitive()) { if (!lhsI.isPrimitive()) {
...@@ -193,7 +194,14 @@ public class GremlinQueryComposer { ...@@ -193,7 +194,14 @@ public class GremlinQueryComposer {
rhs = addQuotesIfNecessary(lhsI, rhs); rhs = addQuotesIfNecessary(lhsI, rhs);
SearchParameters.Operator op = SearchParameters.Operator.fromString(operator); SearchParameters.Operator op = SearchParameters.Operator.fromString(operator);
if (op == SearchParameters.Operator.LIKE) { if (op == SearchParameters.Operator.LIKE) {
add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs)); final AtlasStructType.AtlasAttribute attribute = context.getActiveEntityType().getAttribute(lhsI.getAttributeName());
final AtlasStructDef.AtlasAttributeDef.IndexType indexType = attribute.getAttributeDef().getIndexType();
if (indexType == AtlasStructDef.AtlasAttributeDef.IndexType.STRING) {
add(GremlinClause.STRING_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
} else {
add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
}
} else if (op == SearchParameters.Operator.IN) { } else if (op == SearchParameters.Operator.IN) {
add(GremlinClause.HAS_OPERATOR, getPropertyForClause(lhsI), "within", rhs); add(GremlinClause.HAS_OPERATOR, getPropertyForClause(lhsI), "within", rhs);
} else { } else {
......
...@@ -17,17 +17,23 @@ ...@@ -17,17 +17,23 @@
*/ */
package org.apache.atlas.query; package org.apache.atlas.query;
import afu.org.checkerframework.checker.igj.qual.I;
import jnr.ffi.annotations.In;
import org.apache.atlas.AtlasErrorCode; import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.exception.AtlasBaseException; import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.TypeCategory; import org.apache.atlas.model.TypeCategory;
import org.apache.atlas.model.typedef.AtlasStructDef;
import org.apache.atlas.query.antlr4.AtlasDSLParser; import org.apache.atlas.query.antlr4.AtlasDSLParser;
import org.apache.atlas.type.AtlasEntityType; import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasStructType;
import org.apache.atlas.type.AtlasType; import org.apache.atlas.type.AtlasType;
import org.apache.atlas.type.AtlasTypeRegistry; import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
...@@ -163,6 +169,8 @@ public class GremlinQueryComposerTest { ...@@ -163,6 +169,8 @@ public class GremlinQueryComposerTest {
verify("from DB where (name = \"Reporting\") select name, owner", getExpected(exSel, exMain)); verify("from DB where (name = \"Reporting\") select name, owner", getExpected(exSel, exMain));
verify("Table where Asset.name like \"Tab*\"", verify("Table where Asset.name like \"Tab*\"",
"g.V().has('__typeName', 'Table').has('Asset.__s_name', org.janusgraph.core.attribute.Text.textRegex(\"Tab.*\")).dedup().limit(25).toList()"); "g.V().has('__typeName', 'Table').has('Asset.__s_name', org.janusgraph.core.attribute.Text.textRegex(\"Tab.*\")).dedup().limit(25).toList()");
verify("Table where owner like \"Tab*\"",
"g.V().has('__typeName', 'Table').has('Table.owner', org.janusgraph.core.attribute.Text.textContainsRegex(\"Tab.*\")).dedup().limit(25).toList()");
verify("from Table where (db.name = \"Reporting\")", verify("from Table where (db.name = \"Reporting\")",
"g.V().has('__typeName', 'Table').out('__Table.db').has('DB.name', eq(\"Reporting\")).dedup().in('__Table.db').dedup().limit(25).toList()"); "g.V().has('__typeName', 'Table').out('__Table.db').has('DB.name', eq(\"Reporting\")).dedup().in('__Table.db').dedup().limit(25).toList()");
} }
...@@ -409,6 +417,21 @@ public class GremlinQueryComposerTest { ...@@ -409,6 +417,21 @@ public class GremlinQueryComposerTest {
} else { } else {
type = mock(AtlasEntityType.class); type = mock(AtlasEntityType.class);
when(type.getTypeCategory()).thenReturn(TypeCategory.ENTITY); when(type.getTypeCategory()).thenReturn(TypeCategory.ENTITY);
AtlasStructType.AtlasAttribute attr = mock(AtlasStructType.AtlasAttribute.class);
AtlasStructDef.AtlasAttributeDef def = mock(AtlasStructDef.AtlasAttributeDef.class);
when(def.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.DEFAULT);
when(attr.getAttributeDef()).thenReturn(def);
AtlasStructType.AtlasAttribute attr_s = mock(AtlasStructType.AtlasAttribute.class);
AtlasStructDef.AtlasAttributeDef def_s = mock(AtlasStructDef.AtlasAttributeDef.class);
when(def_s.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.STRING);
when(attr_s.getAttributeDef()).thenReturn(def_s);
when(((AtlasEntityType) type).getAttribute(anyString())).thenReturn(attr);
when(((AtlasEntityType) type).getAttribute(eq("name"))).thenReturn(attr_s);
} }
if(typeName.equals("PIII")) { if(typeName.equals("PIII")) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment