Commit c9303742 by Ashutosh Mestry

ATLAS-2229: Improved date handling.

parent 212417c9
......@@ -28,18 +28,17 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class GremlinQueryComposer {
private static final Logger LOG = LoggerFactory.getLogger(GremlinQueryComposer.class);
private final String DATE_FORMAT_ISO8601_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
private final int DEFAULT_QUERY_RESULT_LIMIT = 25;
private final int DEFAULT_QUERY_RESULT_OFFSET = 0;
......@@ -51,19 +50,19 @@ public class GremlinQueryComposer {
private int providedLimit = DEFAULT_QUERY_RESULT_LIMIT;
private int providedOffset = DEFAULT_QUERY_RESULT_OFFSET;
private Context context;
private final DateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT_ISO8601_FORMAT);
@Inject
public GremlinQueryComposer(AtlasTypeRegistry typeRegistry, final AtlasDSL.QueryMetadata qmd) {
isNestedQuery = false;
lookup = new RegistryBasedLookup(errorList, typeRegistry);
this.context = new Context(errorList, lookup);
queryMetadata = qmd;
public GremlinQueryComposer(Lookup registryLookup, final AtlasDSL.QueryMetadata qmd, boolean isNestedQuery) {
this.isNestedQuery = isNestedQuery;
this.lookup = registryLookup;
this.queryMetadata = qmd;
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
init();
}
public GremlinQueryComposer(AtlasTypeRegistry typeRegistry, final AtlasDSL.QueryMetadata queryMetadata, int limit, int offset) {
this(typeRegistry, queryMetadata);
public GremlinQueryComposer(AtlasTypeRegistry typeRegistry, final AtlasDSL.QueryMetadata qmd, int limit, int offset) {
this(new RegistryBasedLookup(typeRegistry), qmd, false);
this.context = new Context(errorList, lookup);
providedLimit = limit;
providedOffset = offset < 0 ? DEFAULT_QUERY_RESULT_OFFSET : offset;
......@@ -75,14 +74,7 @@ public class GremlinQueryComposer {
this.lookup = lookup;
this.context = context;
this.queryMetadata = qmd;
init();
}
public GremlinQueryComposer(Lookup registryLookup, final AtlasDSL.QueryMetadata qmd, boolean isNestedQuery) {
this.isNestedQuery = isNestedQuery;
this.lookup = registryLookup;
this.queryMetadata = qmd;
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
init();
}
......@@ -267,9 +259,15 @@ public class GremlinQueryComposer {
close();
String items[] = getFormattedClauses(queryMetadata.needTransformation());
return queryMetadata.needTransformation() ?
String s = queryMetadata.needTransformation() ?
getTransformedClauses(items) :
String.join(".", items);
if(LOG.isDebugEnabled()) {
LOG.debug("Gremlin: {}", s);
}
return s;
}
private String getTransformedClauses(String[] items) {
......@@ -315,10 +313,6 @@ public class GremlinQueryComposer {
}
}
private static String quoted(String rhs) {
return IdentifierHelper.getQuoted(rhs);
}
private void addSelectTransformation(final SelectClauseComposer selectClauseComposer) {
GremlinClause fn;
if (selectClauseComposer.isSelectNoop) {
......@@ -343,14 +337,26 @@ public class GremlinQueryComposer {
private String addQuotesIfNecessary(String rhs) {
if(IdentifierHelper.isTrueOrFalse(rhs)) return rhs;
if(IdentifierHelper.isQuoted(rhs)) return rhs;
return quoted(rhs);
return IdentifierHelper.getQuoted(rhs);
}
private String parseDate(String rhs) {
String s = IdentifierHelper.isQuoted(rhs) ?
IdentifierHelper.removeQuotes(rhs) :
rhs;
return String.format("'%d'", DateTime.parse(s).getMillis());
return String.format("'%d'", getDateFormat(s));
}
public long getDateFormat(String s) {
try {
return dateFormat.parse(s).getTime();
} catch (ParseException ex) {
errorList.add(ex.getMessage());
}
return -1;
}
private void close() {
......
......@@ -24,14 +24,15 @@ import org.apache.atlas.model.typedef.AtlasBaseTypeDef;
import org.apache.atlas.type.*;
import org.apache.commons.lang.StringUtils;
import java.util.ArrayList;
import java.util.List;
class RegistryBasedLookup implements Lookup {
private final List<String> errorList;
private final AtlasTypeRegistry typeRegistry;
public RegistryBasedLookup(List<String> errorList, AtlasTypeRegistry typeRegistry) {
this.errorList = errorList;
public RegistryBasedLookup(AtlasTypeRegistry typeRegistry) {
this.errorList = new ArrayList<>();
this.typeRegistry = typeRegistry;
}
......
......@@ -29,11 +29,8 @@ import org.apache.atlas.query.antlr4.AtlasDSLParser;
import org.apache.atlas.runner.LocalSolrRunner;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;
import org.testng.ITestContext;
import org.testng.annotations.*;
import javax.inject.Inject;
......@@ -507,268 +504,6 @@ public class DSLQueriesTest extends BasicTestSetup {
assertSearchResult(searchResult, expected);
}
@Test
public void classification() {
String expected = "g.V().has('__traitNames', within('PII')).limit(25).toList()";
verify("PII", expected);
}
@Test
public void dimension() {
String expected = "g.V().has('__typeName', 'hive_table').has('__traitNames', within('Dimension')).limit(25).toList()";
verify("hive_table isa Dimension", expected);
verify("hive_table is Dimension", expected);
verify("hive_table where hive_table is Dimension", expected);
// Not supported since it requires two singleSrcQuery, one for isa clause other for where clause
// verify("Table isa Dimension where name = 'sales'",
// "g.V().has('__typeName', 'Table').has('__traitNames', within('Dimension')).has('Table.name', eq('sales')).limit(25).toList()");
}
@Test
public void fromDB() {
verify("from hive_db", "g.V().has('__typeName', 'hive_db').limit(25).toList()");
verify("from hive_db limit 10", "g.V().has('__typeName', 'hive_db').limit(10).toList()");
verify("hive_db limit 10", "g.V().has('__typeName', 'hive_db').limit(10).toList()");
}
@Test
public void hasName() {
String expected = "g.V().has('__typeName', within('DataSet','hive_column_lineage','Infrastructure','Asset','Process','hive_table','hive_column','hive_db','hive_process')).has('Asset.name').limit(25).toList()";
verify("Asset has name", expected);
verify("Asset where Asset has name", expected);
}
@Test
public void simpleAlias() {
verify("Asset as a", "g.V().has('__typeName', within('DataSet','hive_column_lineage','Infrastructure','Asset','Process','hive_table','hive_column','hive_db','hive_process')).as('a').limit(25).toList()");
}
@Test
public void selectQueries() {
String expected = "def f(r){ t=[['d.name','d.owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); }; " +
"f(g.V().has('__typeName', within('DataSet','hive_column_lineage','Infrastructure','Asset','Process','hive_table','hive_column','hive_db','hive_process')).as('d')";
verify("Asset as d select d.name, d.owner", expected + ".limit(25).toList())");
verify("Asset as d select d.name, d.owner limit 10", expected + ".limit(10).toList())");
}
@Test
public void tableSelectColumns() {
String exMain = "g.V().has('__typeName', 'hive_table').out('__hive_table.columns').limit(10).toList()";
String exSel = "def f(r){ r };";
String exSel1 = "def f(r){ t=[['db.name']]; r.each({t.add([it.value('Asset.name')])}); t.unique(); };";
verify("hive_table select columns limit 10", getExpected(exSel, exMain));
String exMain2 = "g.V().has('__typeName', 'hive_table').out('__hive_table.db').limit(25).toList()";
verify("hive_table select db", getExpected(exSel, exMain2));
String exMain3 = "g.V().has('__typeName', 'hive_table').out('__hive_table.db').limit(25).toList()";
verify("hive_table select db.name", getExpected(exSel1, exMain3));
}
@Test(enabled = false)
public void SelectLimit() {
verify("from hive_db limit 5", "g.V().has('__typeName', 'hive_db').limit(5).toList()");
verify("from hive_db limit 5 offset 2", "g.V().has('__typeName', 'hive_db').range(2, 7).toList()");
}
@Test
public void orderBy() {
String expected = "g.V().has('__typeName', 'hive_db').order().by('Asset.name').limit(25).toList()";
verify("hive_db orderby name", expected);
verify("from hive_db orderby name", expected);
verify("from hive_db as d orderby d.owner limit 3", "g.V().has('__typeName', 'hive_db').as('d').order().by('Asset.owner').limit(3).toList()");
verify("hive_db as d orderby d.owner limit 3", "g.V().has('__typeName', 'hive_db').as('d').order().by('Asset.owner').limit(3).toList()");
String exSel = "def f(r){ t=[['d.name','d.owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); };";
String exMain = "g.V().has('__typeName', 'hive_db').as('d').order().by('Asset.owner').limit(25).toList()";
verify("hive_db as d select d.name, d.owner orderby (d.owner) limit 25", getExpected(exSel, exMain));
String exMain2 = "g.V().has('__typeName', 'hive_table').and(__.has('Asset.name', eq(\"sales_fact\")),__.has('hive_table.createTime', gt('1388563200000'))).order().by('hive_table.createTime').limit(25).toList()";
String exSel2 = "def f(r){ t=[['_col_0','_col_1']]; r.each({t.add([it.value('Asset.name'),it.value('hive_table.createTime')])}); t.unique(); };";
verify("hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby _col_1",
getExpected(exSel2, exMain2));
}
@Test
public void fromDBOrderByNameDesc() {
verify("from hive_db orderby name DESC", "g.V().has('__typeName', 'hive_db').order().by('Asset.name', decr).limit(25).toList()");
}
@Test
public void fromDBSelect() {
String expected = "def f(r){ t=[['Asset.name','Asset.owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); };" +
" f(g.V().has('__typeName', 'hive_db').limit(25).toList())";
verify("from hive_db select Asset.name, Asset.owner", expected);
expected = "def f(r){ t=[['min(name)','max(owner)']]; " +
"def min=r.min({it.value('Asset.name')}).value('Asset.name'); " +
"def max=r.max({it.value('Asset.owner')}).value('Asset.owner'); " +
"t.add([min,max]); t;}; " +
"f(g.V().has('__typeName', 'hive_db').limit(25).toList())";
verify("hive_db select min(name), max(owner)", expected);
expected = "def f(r){ t=[['owner','min(name)','max(owner)']]; " +
"def min=r.min({it.value('Asset.name')}).value('Asset.name'); " +
"def max=r.max({it.value('Asset.owner')}).value('Asset.owner'); " +
"r.each({t.add([it.value('Asset.owner'),min,max])}); t.unique(); }; " +
"f(g.V().has('__typeName', 'hive_db').limit(25).toList())";
verify("hive_db select owner, min(name), max(owner)", expected);
}
@Test
public void fromDBGroupBy() {
verify("from hive_db groupby (Asset.owner)", "g.V().has('__typeName', 'hive_db').group().by('Asset.owner').limit(25).toList()");
}
@Test
public void whereClauseTextContains() {
String exMain = "g.V().has('__typeName', 'hive_db').has('Asset.name', eq(\"Reporting\")).limit(25).toList()";
String exSel = "def f(r){ t=[['name','owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); };";
verify("from hive_db where name = \"Reporting\" select name, owner", getExpected(exSel, exMain));
verify("from hive_db where (name = \"Reporting\") select name, owner", getExpected(exSel, exMain));
verify("hive_table where Asset.name like \"Tab*\"",
"g.V().has('__typeName', 'hive_table').has('Asset.name', org.janusgraph.core.attribute.Text.textRegex(\"Tab.*\")).limit(25).toList()");
verify("from hive_table where (db.name = \"Reporting\")",
"g.V().has('__typeName', 'hive_table').out('__hive_table.db').has('Asset.name', eq(\"Reporting\")).dedup().in('__hive_table.db').limit(25).toList()");
}
@Test
public void whereClauseWithAsTextContains() {
String exSel = "def f(r){ t=[['t.name','t.owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); };";
String exMain = "g.V().has('__typeName', 'hive_table').as('t').has('Asset.name', eq(\"testtable_1\")).limit(25).toList()";
verify("hive_table as t where t.name = \"testtable_1\" select t.name, t.owner)", getExpected(exSel, exMain));
}
@Test
public void whereClauseWithDateCompare() {
String exSel = "def f(r){ t=[['t.name','t.owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); };";
String exMain = "g.V().has('__typeName', 'hive_table').as('t').has('hive_table.createTime', eq('1513046158440')).limit(25).toList()";
verify("hive_table as t where t.createTime = \"2017-12-12T02:35:58.440Z\" select t.name, t.owner)", getExpected(exSel, exMain));
}
@Test
public void subType() {
String exMain = "g.V().has('__typeName', within('DataSet','hive_column_lineage','Infrastructure','Asset','Process','hive_table','hive_column','hive_db','hive_process')).limit(25).toList()";
String exSel = "def f(r){ t=[['name','owner']]; r.each({t.add([it.value('Asset.name'),it.value('Asset.owner')])}); t.unique(); };";
verify("Asset select name, owner", getExpected(exSel, exMain));
}
@Test
public void TraitWithSpace() {
verify("`Log Data`", "g.V().has('__traitNames', within('Log Data')).limit(25).toList()");
}
@Test
public void nestedQueries() {
verify("hive_table where name=\"sales_fact\" or name=\"testtable_1\"",
"g.V().has('__typeName', 'hive_table').or(__.has('Asset.name', eq(\"sales_fact\")),__.has('Asset.name', eq(\"testtable_1\"))).limit(25).toList()");
verify("hive_table where name=\"sales_fact\" and name=\"testtable_1\"",
"g.V().has('__typeName', 'hive_table').and(__.has('Asset.name', eq(\"sales_fact\")),__.has('Asset.name', eq(\"testtable_1\"))).limit(25).toList()");
verify("hive_table where name=\"sales_fact\" or name=\"testtable_1\" or name=\"testtable_2\"",
"g.V().has('__typeName', 'hive_table')" +
".or(" +
"__.has('Asset.name', eq(\"sales_fact\"))," +
"__.has('Asset.name', eq(\"testtable_1\"))," +
"__.has('Asset.name', eq(\"testtable_2\"))" +
").limit(25).toList()");
verify("hive_table where name=\"sales_fact\" and name=\"testtable_1\" and name=\"testtable_2\"",
"g.V().has('__typeName', 'hive_table')" +
".and(" +
"__.has('Asset.name', eq(\"sales_fact\"))," +
"__.has('Asset.name', eq(\"testtable_1\"))," +
"__.has('Asset.name', eq(\"testtable_2\"))" +
").limit(25).toList()");
verify("hive_table where (name=\"sales_fact\" or name=\"testtable_1\") and name=\"testtable_2\"",
"g.V().has('__typeName', 'hive_table')" +
".and(" +
"__.or(" +
"__.has('Asset.name', eq(\"sales_fact\"))," +
"__.has('Asset.name', eq(\"testtable_1\"))" +
")," +
"__.has('Asset.name', eq(\"testtable_2\")))" +
".limit(25).toList()");
verify("hive_table where name=\"sales_fact\" or (name=\"testtable_1\" and name=\"testtable_2\")",
"g.V().has('__typeName', 'hive_table')" +
".or(" +
"__.has('Asset.name', eq(\"sales_fact\"))," +
"__.and(" +
"__.has('Asset.name', eq(\"testtable_1\"))," +
"__.has('Asset.name', eq(\"testtable_2\")))" +
")" +
".limit(25).toList()");
verify("hive_table where name=\"sales_fact\" or name=\"testtable_1\" and name=\"testtable_2\"",
"g.V().has('__typeName', 'hive_table')" +
".and(" +
"__.or(" +
"__.has('Asset.name', eq(\"sales_fact\"))," +
"__.has('Asset.name', eq(\"testtable_1\"))" +
")," +
"__.has('Asset.name', eq(\"testtable_2\")))" +
".limit(25).toList()");
verify("hive_table where (name=\"sales_fact\" and owner=\"Joe\") OR (name=\"sales_fact_daily_mv\" and owner=\"Joe BI\")",
"g.V().has('__typeName', 'hive_table')" +
".or(" +
"__.and(" +
"__.has('Asset.name', eq(\"sales_fact\"))," +
"__.has('Asset.owner', eq(\"Joe\"))" +
")," +
"__.and(" +
"__.has('Asset.name', eq(\"sales_fact_daily_mv\"))," +
"__.has('Asset.owner', eq(\"Joe BI\"))" +
"))" +
".limit(25).toList()");
verify("hive_table where owner=\"hdfs\" or ((name=\"testtable_1\" or name=\"testtable_2\") and createTime < \"2017-12-12T02:35:58.440Z\")",
"g.V().has('__typeName', 'hive_table').or(__.has('Asset.owner', eq(\"hdfs\")),__.and(__.or(__.has('Asset.name', eq(\"testtable_1\")),__.has('Asset.name', eq(\"testtable_2\"))),__.has('hive_table.createTime', lt('1513046158440')))).limit(25).toList()");
verify("hive_table where hive_table.name='Reporting' and hive_table.createTime < '2017-12-12T02:35:58.440Z'",
"g.V().has('__typeName', 'hive_table').and(__.has('Asset.name', eq('Reporting')),__.has('hive_table.createTime', lt('1513046158440'))).limit(25).toList()");
verify("hive_table where db.name='Sales' and db.clusterName='cl1'",
"g.V().has('__typeName', 'hive_table').and(__.out('__hive_table.db').has('Asset.name', eq('Sales')).dedup().in('__hive_table.db'),__.out('__hive_table.db').has('hive_db.clusterName', eq('cl1')).dedup().in('__hive_table.db')).limit(25).toList()");
}
private void verify(String dsl, String expectedGremlin) {
AtlasDSLParser.QueryContext queryContext = getParsedQuery(dsl);
String actualGremlin = getGremlinQuery(queryContext);
assertEquals(actualGremlin, expectedGremlin);
}
private String getExpected(String select, String main) {
return String.format("%s f(%s)", select, main);
}
private AtlasDSLParser.QueryContext getParsedQuery(String query) {
AtlasDSLParser.QueryContext queryContext = null;
InputStream stream = new ByteArrayInputStream(query.getBytes());
AtlasDSLLexer lexer = null;
try {
lexer = new AtlasDSLLexer(CharStreams.fromStream(stream));
} catch (IOException e) {
assertTrue(false);
}
TokenStream inputTokenStream = new CommonTokenStream(lexer);
AtlasDSLParser parser = new AtlasDSLParser(inputTokenStream);
queryContext = parser.query();
assertNotNull(queryContext);
assertNull(queryContext.exception);
return queryContext;
}
private String getGremlinQuery(AtlasDSLParser.QueryContext queryContext) {
GremlinQueryComposer gremlinQueryComposer = new GremlinQueryComposer(typeRegistry, new AtlasDSL.QueryMetadata(queryContext));
DSLVisitor qv = new DSLVisitor(gremlinQueryComposer);
qv.visit(queryContext);
String s = gremlinQueryComposer.get();
assertTrue(StringUtils.isNotEmpty(s));
return s;
}
private void assertSearchResult(AtlasSearchResult searchResult, int expected) {
assertNotNull(searchResult);
if(expected == 0) {
......
......@@ -137,9 +137,9 @@ public class GremlinQueryComposerTest {
String exMain = "g.V().has('__typeName', 'DB').as('d').order().by('DB.owner').limit(25).toList()";
verify("DB as d select d.name, d.owner orderby (d.owner) limit 25", getExpected(exSel, exMain));
String exMain2 = "g.V().has('__typeName', 'Table').and(__.has('Table.name', eq(\"sales_fact\")),__.has('Table.createTime', gt('1388563200000'))).order().by('Table.createTime').limit(25).toList()";
String exMain2 = "g.V().has('__typeName', 'Table').and(__.has('Table.name', eq(\"sales_fact\")),__.has('Table.createTime', gt('1418265300000'))).order().by('Table.createTime').limit(25).toList()";
String exSel2 = "def f(r){ t=[['_col_0','_col_1']]; r.each({t.add([it.value('Table.name'),it.value('Table.createTime')])}); t.unique(); }";
verify("Table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby _col_1",
verify("Table where (name = \"sales_fact\" and createTime > \"2014-12-11T02:35:0.0Z\" ) select name as _col_0, createTime as _col_1 orderby _col_1",
getExpected(exSel2, exMain2));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment