Commit d1c585a2 by apoorvnaik Committed by Madhan Neethiraj

ATLAS-2091: basic search update to avoid index query for attribute values…

ATLAS-2091: basic search update to avoid index query for attribute values containing Tokenizer characters Signed-off-by: 's avatarMadhan Neethiraj <madhan@apache.org>
parent a785e935
...@@ -168,7 +168,7 @@ public abstract class SearchProcessor { ...@@ -168,7 +168,7 @@ public abstract class SearchProcessor {
if (isIndexSearchable(filterCriteria, structType)) { if (isIndexSearchable(filterCriteria, structType)) {
indexFiltered.add(attributeName); indexFiltered.add(attributeName);
} else { } else {
LOG.warn("not using index-search for attribute '{}' - its either non-indexed or a string attribute used with NEQ operator; might cause poor performance", structType.getQualifiedAttributeName(attributeName)); LOG.warn("not using index-search for attribute '{}'; might cause poor performance", structType.getQualifiedAttributeName(attributeName));
graphFiltered.add(attributeName); graphFiltered.add(attributeName);
} }
...@@ -330,16 +330,34 @@ public abstract class SearchProcessor { ...@@ -330,16 +330,34 @@ public abstract class SearchProcessor {
boolean ret = indexedKeys != null && indexedKeys.contains(qualifiedName); boolean ret = indexedKeys != null && indexedKeys.contains(qualifiedName);
if (ret) { // index exists if (ret) { // index exists
// Don't use index query for NEQ on string type attributes - as it might return fewer entries due to tokenization of vertex property value by indexer // for string type attributes, don't use index query in the following cases:
if (filterCriteria.getOperator() == SearchParameters.Operator.NEQ) { // - operation is NEQ, as it might return fewer entries due to tokenization of vertex property value
AtlasType attributeType = structType.getAttributeType(filterCriteria.getAttributeName()); // - value-to-compare has special characters
AtlasType attributeType = structType.getAttributeType(filterCriteria.getAttributeName());
if (AtlasBaseTypeDef.ATLAS_TYPE_STRING.equals(attributeType.getTypeName())) {
if (filterCriteria.getOperator() == SearchParameters.Operator.NEQ) {
if (LOG.isDebugEnabled()) {
LOG.debug("NEQ operator found for string attribute {}, deferring to in-memory or graph query (might cause poor performance)", qualifiedName);
}
ret = false;
} else if (hasIndexQuerySpecialChar(filterCriteria.getAttributeValue())) {
if (LOG.isDebugEnabled()) {
LOG.debug("special characters found in filter value {}, deferring to in-memory or graph query (might cause poor performance)", filterCriteria.getAttributeValue());
}
if (AtlasBaseTypeDef.ATLAS_TYPE_STRING.equals(attributeType.getTypeName())) {
ret = false; ret = false;
} }
} }
} }
if (LOG.isDebugEnabled()) {
if (!ret) {
LOG.debug("Not using index query for: attribute='{}', operator='{}', value='{}'", qualifiedName, filterCriteria.getOperator(), filterCriteria.getAttributeValue());
}
}
return ret; return ret;
} }
...@@ -358,7 +376,6 @@ public abstract class SearchProcessor { ...@@ -358,7 +376,6 @@ public abstract class SearchProcessor {
if (nestedExpression.length() > 0) { if (nestedExpression.length() > 0) {
nestedExpression.append(SPACE_STRING).append(criteria.getCondition()).append(SPACE_STRING); nestedExpression.append(SPACE_STRING).append(criteria.getCondition()).append(SPACE_STRING);
} }
// todo: when a neq operation is nested and occurs in the beginning of the query, index query has issues
nestedExpression.append(nestedQuery); nestedExpression.append(nestedQuery);
} }
} }
...@@ -539,8 +556,7 @@ public abstract class SearchProcessor { ...@@ -539,8 +556,7 @@ public abstract class SearchProcessor {
query.has(qualifiedName, AtlasGraphQuery.ComparisionOperator.NOT_EQUAL, attrValue); query.has(qualifiedName, AtlasGraphQuery.ComparisionOperator.NOT_EQUAL, attrValue);
break; break;
case LIKE: case LIKE:
// TODO: Maybe we need to validate pattern query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, attrValue);
query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, getLikeRegex(attrValue));
break; break;
case CONTAINS: case CONTAINS:
query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, getContainsRegex(attrValue)); query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, getContainsRegex(attrValue));
...@@ -616,41 +632,88 @@ public abstract class SearchProcessor { ...@@ -616,41 +632,88 @@ public abstract class SearchProcessor {
} }
} }
// ATLAS-2118: Reserved regex characters in attribute value can cause the graph query to fail when parsing the contains regex private static String getContainsRegex(String attributeValue) {
private String getContainsRegex(String attributeValue) { return ".*" + escapeRegExChars(attributeValue) + ".*";
StringBuilder escapedAttrVal = new StringBuilder(".*"); }
for (int i = 0; i < attributeValue.length(); i++) { private static String getSuffixRegex(String attributeValue) {
final char c = attributeValue.charAt(i); return ".*" + escapeRegExChars(attributeValue);
}
switch (c) {
case '+': private static String escapeRegExChars(String val) {
case '|': StringBuilder escapedVal = new StringBuilder();
case '(':
case '{': for (int i = 0; i < val.length(); i++) {
case '[': final char c = val.charAt(i);
case '*':
case '?': if (isRegExSpecialChar(c)) {
case '$': escapedVal.append('\\');
case '/':
case '^':
escapedAttrVal.append('\\');
break;
} }
escapedAttrVal.append(c); escapedVal.append(c);
} }
escapedAttrVal.append(".*"); return escapedVal.toString();
}
private static boolean isRegExSpecialChar(char c) {
switch (c) {
case '+':
case '|':
case '(':
case '{':
case '[':
case '*':
case '?':
case '$':
case '/':
case '^':
return true;
}
return escapedAttrVal.toString(); return false;
} }
private String getSuffixRegex(String attributeValue) { private static boolean hasIndexQuerySpecialChar(String attributeValue) {
return ".*" + attributeValue; for (int i = 0; i < attributeValue.length(); i++) {
if (isIndexQuerySpecialChar(attributeValue.charAt(i))) {
return true;
}
}
return false;
} }
private String getLikeRegex(String attributeValue) { return ".*" + attributeValue + ".*"; } private static boolean isIndexQuerySpecialChar(char c) {
switch (c) {
case '+':
case '-':
case '&':
case '|':
case '!':
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case '^':
case '"':
case '~':
case '*':
case '?':
case ':':
case '/':
case '#':
case '$':
case '%':
case '@':
case '=':
return true;
}
return false;
}
protected List<AtlasVertex> getVerticesFromIndexQueryResult(Iterator<AtlasIndexQuery.Result> idxQueryResult, List<AtlasVertex> vertices) { protected List<AtlasVertex> getVerticesFromIndexQueryResult(Iterator<AtlasIndexQuery.Result> idxQueryResult, List<AtlasVertex> vertices) {
if (idxQueryResult != null) { if (idxQueryResult != null) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment