//Metadata store CREATE KEYSPACE metadata WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1'}; create table metadata ( set_ text, position_in_set int, group text, creation_time timestamp, // timestamp for each metadata item //Navigational metadata tags list, dataset_location text, //Process metadata data_source text, data_source_type text, proc_function text, proc_app text, //Descriptive metadata meaning text, purpose text, identifier text, language text, content_description text, creator text, place_location text, content_type text, content_format text, data_size text, required_sw text, decision_point text, //Quality metadata quality_param_1 decimal, quality_param1_description text, quality_param_2 decimal, quality_param2_description text, quality_param_3 decimal, quality_param3_description text, quality_param_4 decimal, quality_param4_description text, quality_param_5 decimal, quality_param5_description text, quality_param_6 decimal, quality_param6_description text, decision_point_required boolean, //Admin metadata data_provider text, licenses text, access_rights text, copyright text, data_privacy text, data_storage_indicator text, PRIMARY KEY(identifier,creation_time) ); create table metadata_dataitems ( identifier text, creation_time timestamp, dataitem_id bigint, //Quality attributes quality_param_1 decimal, quality_param1_description text, quality_param_2 decimal, quality_param2_description text, quality_param_3 decimal, quality_param3_description text, PRIMARY KEY(identifier, creation_time, dataitem_id) ); //Quality rules: quality attributes and metrics CREATE KEYSPACE quality_attributes WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1'}; create type range( min decimal, max decimal ); create table quality_metric( identifier bigint, description text, purpose text, target text, applicability text, value_range list>, acceptable_range list>, format text, formula text, PRIMARY KEY(identifier) ); create table quality_attribute( identifier bigint, name text, data_source_type text, target text, value decimal, quality_metric_weights map, quality_data_processing_id bigint, PRIMARY KEY(identifier) ); //Common structures create type range( min decimal, max decimal ); create type time_range( start_time timestamp, end_time timestamp ); //Quality rules: quality policies CREATE KEYSPACE quality_policies WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1'}; create table organizational_quality_policy( identifier bigint, data_source_type text, metadata_id text, quality_attributes set, filtering_policy bigint, PRIMARY KEY(identifier) ); create table filtering_policy( identifier bigint, description text, quality_attribute_ranges map>, PRIMARY KEY(identifier) ); create table search_filtering_policy( identifier bigint, description text, quality_attribute_ranges map>, metadata_id text, timerange frozen, include_data boolean, PRIMARY KEY(identifier) ); create table profile( decision_point text, organizational_quality_policies set, PRIMARY KEY(decision_point) ); //Quality rules: quality data processing CREATE KEYSPACE quality_processing WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1'}; create table quality_data_processing( identifier bigint, configuration_parameters list, processing_parameters list, quality_data_processing_identifier bigint, PRIMARY KEY(identifier) ); create table supported_quality_data_processing( identifier bigint, input_data_type text, output_data_type text, processing_type text, quality_attributes list, data_processing_tool text, PRIMARY KEY(identifier) ); //Data store CREATE KEYSPACE data_store WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : '1'}; //data store for small data items (such as tweets) create table data_items ( identifier text, creation_time timestamp, dataitem_id bigint, data blob, text_data text, PRIMARY KEY(identifier, creation_time, dataitem_id) );