# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #+TITLE: Query DSL #+AUTHOR: Harish Butani #+EMAIL: hbutani@apache.org #+LANGUAGE: en #+INFOJS_OPT: view:showall toc:t ltoc:t mouse:underline path:http://orgmode.org/org-info.js #+LINK_HOME: http://home.fnal.gov/~neilsen #+LINK_UP: http://home.fnal.gov/~neilsen/notebook #+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://orgmode.org/org-manual.css" /> #+LaTeX_CLASS: smarticle #+LaTeX_HEADER: \pdfmapfile{/home/neilsen/texmf/fonts/map/dvips/libertine/libertine.map} #+LaTeX_HEADER: \usepackage[ttscale=.875]{libertine} #+LaTeX_HEADER: \usepackage{sectsty} #+LaTeX_HEADER: \sectionfont{\normalfont\scshape} #+LaTeX_HEADER: \subsectionfont{\normalfont\itshape} #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+OPTIONS: H:2 num:nil toc:nil \n:nil @:t ::t |:t ^:{} _:{} *:t TeX:t LaTeX:t #+STARTUP: showall #+OPTIONS: html-postamble:nil ** Example Type Definitions #+begin_src plantuml :file class_diagram.png scale 1300 width note left of Trait : traits are classifications/tags attached to Instances class Trait Trait <|-- JDbcAccess Trait <|-- PII Trait <|-- Dimension Trait <|-- Metric Trait <|-- ETL class Object Object --* Trait : traits > Object <|-- DB Object <|-- Table Object <|-- Column class DB { name : String owner : String } class StorageDescriptor { inputFormat : String outputFormat : String } class Column { name : String dataType : String } class Table { name: String db: DB } Table -> StorageDescriptor : storageDesc > Table -> DB : db > Column *-> StorageDescriptor : storageDesc > class LoadProcess { name String } LoadProcess -* Table : inputTables > LoadProcess -> Table : outputTable > class View { name String } View -* Table : inputTables > #+end_src #+CAPTION: ETL and Reporting Scenario Types #+LABEL: fig:sampleTypeDefs #+results: [[file:class_diagram.png]] ** Example Instance Graph #+begin_src dot :file instanceGraph.png :cmdline -Kdot -Tpng digraph G { //size ="6 6"; nodesep=.2; //rankdir=LR; ranksep=.25; node [shape=record fontsize=9]; compound=true; subgraph cluster0 { style=bold; label = "Sales Database"; fontsize=18; salesDB[label="DB(sales)"] salesFact[label="Table(sales_fact)" style=filled; color="khaki"] salesStorage[label="Storage(text,text)"] sales_time_id[label="time_id" shape="circle" style=filled color="peachpuff"] sales_product_id[label="product_id" shape="circle" style=filled color="peachpuff"] sales_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"] sales_sales[label="sales" shape="circle" style=filled color="peachpuff"] sales_sales_metric[label="Metric" style=filled; shape="ellipse" color="turquoise"] salesFact -> salesDB; salesFact -> salesStorage; sales_time_id -> salesStorage; sales_product_id -> salesStorage; sales_customer_id -> salesStorage; sales_sales -> salesStorage; sales_sales -> sales_sales_metric; productDim[label="Table(product_dim)" style=filled; color="khaki"] productStorage[label="Storage(text,text)"] product_product_id[label="product_id" shape="circle" style=filled color="peachpuff"] product_product_name[label="product_name" shape="circle" style=filled color="peachpuff"] product_brand_name[label="brand_name" shape="circle" style=filled color="peachpuff"] product_dimension[label="Dimension" style=filled; shape="ellipse" color="turquoise"] productDim -> salesDB; productDim -> productStorage; product_product_id -> productStorage; product_product_name -> productStorage; product_brand_name -> productStorage; productDim -> product_dimension; productDim -> salesFact [style=invis]; timeDim[label="Table(time_dim)" style=filled; color="khaki"] timeStorage[label="Storage(text,text)"] time_time_id[label="time_id" shape="circle" style=filled color="peachpuff"] time_dayOfYear[label="day_of_year" shape="circle" style=filled color="peachpuff"] time_weekDay[label="week_day" shape="circle" style=filled color="peachpuff"] time_dimension[label="Dimension" style=filled; shape="ellipse" color="turquoise"] timeDim -> salesDB; timeDim -> timeStorage; time_time_id -> timeStorage; time_dayOfYear -> timeStorage; time_weekDay -> timeStorage; timeDim -> time_dimension; timeDim -> productDim [style=invis]; customerDim[label="Table(customer_dim)" style=filled; color="khaki"] customerStorage[label="Storage(text,text)"] customer_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"] customer_name[label="name" shape="circle" style=filled color="peachpuff"] customer_address[label="address" shape="circle" style=filled color="peachpuff"] customer_dimension[label="Dimension" style=filled; shape="ellipse" color="turquoise"] address_pii[label="PII" style=filled; shape="ellipse" color="turquoise"] customerDim -> salesDB; customerDim -> customerStorage; customer_customer_id -> customerStorage; customer_name -> customerStorage; customer_address -> customerStorage; customerDim -> customer_dimension; customer_address -> address_pii; customerDim -> timeDim [style=invis]; //{rank=min; salesDB}; {rank=min; salesDB}; }; subgraph cluster1 { style=bold; label = "Reporting Database"; fontsize=18; reportingDB[label="DB(reporting)"] salesFactDaily[label="Table(sales_daily_mv)" style=filled; color="khaki"] salesDailyStorage[label="Storage(orc,orc)"] salesD_time_id[label="time_id" shape="circle" style=filled color="peachpuff"] salesD_product_id[label="product_id" shape="circle" style=filled color="peachpuff"] salesD_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"] salesD_sales[label="sales" shape="circle" style=filled color="peachpuff"] salesD_sales_metric[label="Metric" style=filled; shape="ellipse" color="turquoise"] salesFactDaily -> reportingDB; salesFactDaily -> salesDailyStorage; salesD_time_id -> salesDailyStorage; salesD_product_id -> salesDailyStorage; salesD_customer_id -> salesDailyStorage; salesD_sales -> salesDailyStorage; salesD_sales -> salesD_sales_metric; salesFactDaily -> reportingDB [style=invis]; productDimView[label="View(product_dim_v)" style=filled; color="khaki"] productDim -> productDimView [style=dotted]; productDimView_dim[label="Dimension" style=filled; shape="ellipse" color="turquoise"] productDimView_jdbc[label="JdbcAccess" style=filled; shape="ellipse" color="turquoise"] productDimView -> productDimView_dim; productDimView -> productDimView_jdbc; productDimView -> salesFactDaily [style=invis]; customerDimView[label="View(customer_dim_v)" style=filled; color="khaki"] customerDim -> customerDimView [style=dotted]; customerDimView_dim[label="Dimension" style=filled; shape="ellipse" color="turquoise"] customerDimView_jdbc[label="JdbcAccess" style=filled; shape="ellipse" color="turquoise"] customerDimView -> customerDimView_dim; customerDimView -> customerDimView_jdbc; customerDimView -> salesFactDaily [style=invis]; salesMonthly[label="Table(sales_monthly_mv)" style=filled; color="khaki"] salesMonthlyStorage[label="Storage(orc,orc)"] salesM_time_id[label="time_id" shape="circle" style=filled color="peachpuff"] salesM_product_id[label="product_id" shape="circle" style=filled color="peachpuff"] salesM_customer_id[label="customer_id" shape="circle" style=filled color="peachpuff"] salesM_sales[label="sales" shape="circle" style=filled color="peachpuff"] salesM_sales_metric[label="Metric" style=filled; shape="ellipse" color="turquoise"] salesMonthly -> reportingDB; salesMonthly -> salesMonthlyStorage; salesM_time_id -> salesMonthlyStorage; salesM_product_id -> salesMonthlyStorage; salesM_customer_id -> salesMonthlyStorage; salesM_sales -> salesMonthlyStorage; salesM_sales -> salesM_sales_metric; salesMonthly -> customerDimView [style=invis]; {rank=min; reportingDB}; }; loadSalesDaily[label="LoadProcess(loadSalesDaily)" style=filled; color="seagreen"; shape="octagon"] loadSalesDaily_etl[label="ETL" style=filled; shape="ellipse" color="turquoise"] salesFact -> loadSalesDaily [style=dotted]; timeDim -> loadSalesDaily [style=dotted]; loadSalesDaily -> salesFactDaily [style=dotted]; loadSalesDaily -> loadSalesDaily_etl; loadSalesMonthly[label="LoadProcess(loadSalesMonthly)" style=filled; color="seagreen"; shape="octagon"] loadSalesMonthly_etl[label="ETL" style=filled; shape="ellipse" color="turquoise"] salesFactDaily -> loadSalesMonthly [style=dotted]; timeDim -> loadSalesMonthly [style=dotted]; loadSalesMonthly -> salesMonthly [style=dotted]; loadSalesMonthly -> loadSalesMonthly_etl; } #+end_src #+CAPTION: ETL and Reporting Scenario #+LABEL: fig:sampleInstanceGraph #+results: [[file:instanceGraph.png]]