Commit 8253653b by Ashutosh Mestry

ATLAS-2353: Fix for ordering of elements when using select with groupBy

parent c746a050
......@@ -158,7 +158,7 @@ public class AtlasDSL {
}
public boolean needTransformation() {
return (hasGroupBy && hasSelect && hasOrderBy) || (hasGroupBy && hasOrderBy) || hasSelect;
return (hasGroupBy && hasSelect && hasOrderBy) || hasSelect;
}
}
}
......@@ -115,6 +115,7 @@ public class DSLVisitor extends AtlasDSLParserBaseVisitor<Void> {
}
selectClauseComposer.setItems(items);
selectClauseComposer.setAttributes(items);
selectClauseComposer.setLabels(labels);
gremlinQueryComposer.addSelect(selectClauseComposer);
}
......
......@@ -51,7 +51,8 @@ enum GremlinClause {
SELECT_FN("def f(r){ t=[[%s]]; %s r.each({t.add([%s])}); t.unique(); }; "),
SELECT_ONLY_AGG_FN("def f(r){ t=[[%s]]; %s t.add([%s]); t;}; "),
SELECT_ONLY_AGG_GRP_FN("def f(l){ t=[[%s]]; l.get(0).each({k,r -> L:{ %s t.add([%s]); } }); t; }; "),
SELECT_MULTI_ATTR_GRP_FN("def f(l){ t=[[%s]]; l.get(0).each({k,r -> L:{ %s r.each({t.add([%s])}) } }); t.unique(); }; "),
// Optional sorting required here
SELECT_MULTI_ATTR_GRP_FN("def f(l){ h=[[%s]]; t=[]; l.get(0).each({k,r -> L:{ %s r.each({t.add([%s])}) } }); h.plus(t.unique()%s); }; "),
INLINE_ASSIGNMENT("def %s=%s;"),
INLINE_LIST_RANGE("[%s..<%s]"),
INLINE_COUNT("r.size()"),
......@@ -60,6 +61,10 @@ enum GremlinClause {
INLINE_MIN("r.min({it.value('%s')}).value('%s')"),
INLINE_GET_PROPERTY("it.value('%s')"),
INLINE_TRANSFORM_CALL("f(%s)"),
INLINE_DEFAULT_SORT(".sort{a,b -> a[0] <=> b[0]}"),
// idx of the tuple field to be sorted on
INLINE_SORT_ASC(".sort{a,b -> a[%s] <=> b[%s]}"),
INLINE_SORT_DESC(".sort{a,b -> b[%s] <=> a[%s]}"),
V("V()"),
VALUE_MAP("valueMap(%s)");
......
......@@ -23,21 +23,26 @@ import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.lang.StringUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TimeZone;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class GremlinQueryComposer {
private static final Logger LOG = LoggerFactory.getLogger(GremlinQueryComposer.class);
private final String EMPTY_STRING = "";
private static final String ISO8601_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
private final int DEFAULT_QUERY_RESULT_LIMIT = 25;
private final int DEFAULT_QUERY_RESULT_OFFSET = 0;
......@@ -179,7 +184,7 @@ public class GremlinQueryComposer {
public void addSelect(SelectClauseComposer selectClauseComposer) {
process(selectClauseComposer);
if (!(queryMetadata.hasOrderBy() && queryMetadata.hasGroupBy())) {
addSelectTransformation(selectClauseComposer);
addSelectTransformation(selectClauseComposer, null, false);
}
this.context.setSelectClauseComposer(selectClauseComposer);
}
......@@ -200,6 +205,7 @@ public class GremlinQueryComposer {
context.addAlias(scc.getLabel(i), ia.getQualifiedName());
}
// Update the qualifiedNames and the assignment expressions
if (scc.updateAsApplicable(i, ia.getQualifiedName())) {
continue;
}
......@@ -309,10 +315,7 @@ public class GremlinQueryComposer {
IdentifierHelper.Advice ia = getAdvice(name);
if (queryMetadata.hasSelect() && queryMetadata.hasGroupBy()) {
addOrderByClause(ia.getQualifiedName(), isDesc);
moveToLast(GremlinClause.GROUP_BY);
addSelectTransformation(this.context.selectClauseComposer);
addSelectTransformation(this.context.selectClauseComposer, ia.getQualifiedName(), isDesc);
} else if (queryMetadata.hasGroupBy()) {
addOrderByClause(ia.getQualifiedName(), isDesc);
moveToLast(GremlinClause.GROUP_BY);
......@@ -321,7 +324,9 @@ public class GremlinQueryComposer {
}
}
private void addSelectTransformation(final SelectClauseComposer selectClauseComposer) {
private void addSelectTransformation(final SelectClauseComposer selectClauseComposer,
final String orderByQualifiedAttrName,
final boolean isDesc) {
GremlinClause fn;
if (selectClauseComposer.isSelectNoop) {
fn = GremlinClause.SELECT_NOOP_FN;
......@@ -335,10 +340,25 @@ public class GremlinQueryComposer {
GremlinClause.SELECT_ONLY_AGG_FN :
GremlinClause.SELECT_FN;
}
queryClauses.add(0, fn,
selectClauseComposer.getLabelHeader(),
selectClauseComposer.hasAssignmentExpr() ? selectClauseComposer.getAssignmentExprString(): "",
selectClauseComposer.getItemsString());
if (StringUtils.isEmpty(orderByQualifiedAttrName)) {
queryClauses.add(0, fn,
selectClauseComposer.getLabelHeader(),
selectClauseComposer.hasAssignmentExpr() ? selectClauseComposer.getAssignmentExprString(): EMPTY_STRING,
selectClauseComposer.getItemsString(), EMPTY_STRING);
} else {
int itemIdx = selectClauseComposer.getAttrIndex(orderByQualifiedAttrName);
GremlinClause sortClause = GremlinClause.INLINE_DEFAULT_SORT;
if (itemIdx != -1) {
sortClause = isDesc ? GremlinClause.INLINE_SORT_DESC : GremlinClause.INLINE_SORT_ASC;
}
String idxStr = String.valueOf(itemIdx);
queryClauses.add(0, fn,
selectClauseComposer.getLabelHeader(),
selectClauseComposer.hasAssignmentExpr() ? selectClauseComposer.getAssignmentExprString(): EMPTY_STRING,
selectClauseComposer.getItemsString(),
sortClause.get(idxStr, idxStr)
);
}
queryClauses.add(GremlinClause.INLINE_TRANSFORM_CALL);
}
......
......@@ -24,7 +24,10 @@ import java.util.Map;
import java.util.StringJoiner;
class SelectClauseComposer {
public boolean isSelectNoop;
private String[] labels;
private String[] attributes; // Qualified names
private String[] items;
private Map<String, String> itemAssignmentExprs;
......@@ -33,7 +36,6 @@ class SelectClauseComposer {
private int maxIdx = -1;
private int minIdx = -1;
private int aggCount = 0;
public boolean isSelectNoop;
public SelectClauseComposer() {}
......@@ -42,7 +44,7 @@ class SelectClauseComposer {
}
public void setItems(final String[] items) {
this.items = items;
this.items = Arrays.copyOf(items, items.length);
}
public boolean updateAsApplicable(int currentIndex, String qualifiedName) {
......@@ -59,11 +61,77 @@ class SelectClauseComposer {
} else if (currentIndex == getSumIdx()) {
ret = assign(currentIndex, "sum", qualifiedName,
GremlinClause.INLINE_ASSIGNMENT, GremlinClause.INLINE_SUM);
} else {
attributes[currentIndex] = qualifiedName;
}
return ret;
}
public String[] getAttributes() {
return attributes;
}
public void setAttributes(final String[] attributes) {
this.attributes = Arrays.copyOf(attributes, attributes.length);
}
public boolean assign(int i, String qualifiedName, GremlinClause clause) {
items[i] = clause.get(qualifiedName);
return true;
}
public String[] getLabels() {
return labels;
}
public void setLabels(final String[] labels) {
this.labels = labels;
}
public boolean hasAssignmentExpr() {
return itemAssignmentExprs != null && !itemAssignmentExprs.isEmpty();
}
public boolean onlyAggregators() {
return aggCount > 0 && aggCount == items.length;
}
public String getLabelHeader() {
return getJoinedQuotedStr(getLabels());
}
public String getItemsString() {
return String.join(",", getItems());
}
public String getAssignmentExprString(){
return String.join(" ", itemAssignmentExprs.values());
}
public String getItem(int i) {
return items[i];
}
public String getAttribute(int i) {
return attributes[i];
}
public String getLabel(int i) {
return labels[i];
}
public int getAttrIndex(String attr) {
int ret = -1;
for (int i = 0; i < attributes.length; i++) {
if (attributes[i].equals(attr)) {
ret = i;
break;
}
}
return ret;
}
private boolean assign(String item, String assignExpr) {
if (itemAssignmentExprs == null) {
itemAssignmentExprs = new LinkedHashMap<>();
......@@ -73,11 +141,6 @@ class SelectClauseComposer {
return true;
}
public boolean assign(int i, String qualifiedName, GremlinClause clause) {
items[i] = clause.get(qualifiedName);
return true;
}
private boolean assign(int i, String s, String p1, GremlinClause clause) {
items[i] = s;
return assign(items[i], clause.get(s, p1));
......@@ -125,34 +188,6 @@ class SelectClauseComposer {
aggCount++;
}
public String[] getLabels() {
return labels;
}
public void setLabels(final String[] labels) {
this.labels = labels;
}
public boolean hasAssignmentExpr() {
return itemAssignmentExprs != null && !itemAssignmentExprs.isEmpty();
}
public boolean onlyAggregators() {
return aggCount > 0 && aggCount == items.length;
}
public String getLabelHeader() {
return getJoinedQuotedStr(getLabels());
}
public String getItemsString() {
return String.join(",", getItems());
}
public String getAssignmentExprString(){
return String.join(" ", itemAssignmentExprs.values());
}
private String getJoinedQuotedStr(String[] elements) {
StringJoiner joiner = new StringJoiner(",");
Arrays.stream(elements)
......@@ -160,12 +195,4 @@ class SelectClauseComposer {
.forEach(joiner::add);
return joiner.toString();
}
public String getItem(int i) {
return items[i];
}
public String getLabel(int i) {
return labels[i];
}
}
......@@ -392,13 +392,18 @@ public class DSLQueriesTest extends BasicTestSetup {
.withExpectedValues(1)
.withExpectedValues(1)
.withExpectedValues(1) },
// FIXME
// { "from hive_db groupby (owner, name) select Asset.owner, Asset.name, count()",
// new FieldValueValidator()
// .withFieldNames("Asset.owner", "Asset.name", "count()")
// .withExpectedValues("Jane BI", "Reporting", 1)
// .withExpectedValues("Tim ETL", "Logging", 1)
// .withExpectedValues("John ETL", "Sales", 1) },
{ "from hive_db groupby (owner) select owner, name orderby owner",
new FieldValueValidator()
.withFieldNames("owner", "name")
.withExpectedValues("Jane BI", "Reporting")
.withExpectedValues("John ETL", "Sales")
.withExpectedValues("Tim ETL", "Logging") },
{ "from hive_db groupby (owner) select Asset.owner, Asset.name, count()",
new FieldValueValidator()
.withFieldNames("Asset.owner", "Asset.name", "count()")
.withExpectedValues("Jane BI", "Reporting", 1)
.withExpectedValues("Tim ETL", "Logging", 1)
.withExpectedValues("John ETL", "Sales", 1) },
{ "from hive_db groupby (owner) select count() ",
new FieldValueValidator()
.withFieldNames("count()").
......
......@@ -108,14 +108,16 @@ public class GremlinQueryComposerTest {
public void groupByMin() {
verify("from DB groupby (owner) select min(name) orderby name limit 2",
"def f(l){ t=[['min(name)']]; l.get(0).each({k,r -> L:{ def min=r.min({it.value('DB.name')}).value('DB.name'); t.add([min]); } }); t; }; " +
"f(g.V().has('__typeName', 'DB').order().by('DB.name').group().by('DB.owner').limit(2).toList())");
"f(g.V().has('__typeName', 'DB').group().by('DB.owner').limit(2).toList())");
}
@Test
public void groupByOrderBy() {
verify("Table groupby(owner) select name, owner, clusterName orderby name",
"def f(l){ t=[['name','owner','clusterName']]; l.get(0).each({k,r -> L:{ r.each({t.add([it.value('Table.name'),it.value('Table.owner'),it.value('Table.clusterName')])}) } }); t.unique(); }; " +
"f(g.V().has('__typeName', 'Table').order().by('Table.name').group().by('Table.owner').limit(25).toList())");
"def f(l){ h=[['name','owner','clusterName']]; t=[]; " +
"l.get(0).each({k,r -> L:{ r.each({t.add([it.value('Table.name'),it.value('Table.owner'),it.value('Table.clusterName')])}) } }); " +
"h.plus(t.unique().sort{a,b -> a[0] <=> b[0]}); }; " +
"f(g.V().has('__typeName', 'Table').group().by('Table.owner').limit(25).toList())");
}
@Test
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment