From 18f4c4b2d85568ca7a7cdf4da80f384dcdf1eae1 Mon Sep 17 00:00:00 2001 From: Marcello Barnaba Date: Tue, 4 Dec 2012 15:51:27 +0100 Subject: [PATCH] Ditch CTEs in favour of sub-selects --- README.md | 79 ++++++++++++++++++----------- lib/chrono_model.rb | 7 --- lib/chrono_model/patches.rb | 85 ++++---------------------------- lib/chrono_model/time_gate.rb | 8 ++- lib/chrono_model/time_machine.rb | 27 ++++++---- 5 files changed, 83 insertions(+), 123 deletions(-) diff --git a/README.md b/README.md index 393c01bb..01578b63 100644 --- a/README.md +++ b/README.md @@ -28,17 +28,16 @@ structure by e.g. keeping the view rules in sync or dropping/recreating it when migrations. A schema dumper is available as well. Data extraction at a single point in time and even `JOIN`s between temporal and non-temporal data -is implemented using -[Common Table Expressions](http://www.postgresql.org/docs/9.0/static/queries-with.html) -(WITH queries) and a `WHERE date >= valid_from AND date < valid_to` clause, generated automatically -by the provided `TimeMachine` module to be included in your models. +is implemented using sub-selects and a `WHERE` generated by the provided `TimeMachine` module to +be included in your models. -Optimal temporal timestamps indexing is provided using GiST spatial search. +The `WHERE` is optimized using a spatial GiST index in which time is represented represented by +boxes and the filtering is done using the overlapping (`&&`) geometry operator. All timestamps are (forcibly) stored in the UTC time zone, bypassing the `AR::Base.config.default_timezone` setting. -See [README.sql](https://github.com/ifad/chronomodel/blob/master/README.sql) for the plain SQL +See [README.sql](https://github.com/ifad/chronomodel/blob/master/README.sql) for the plain SQL defining this temporal schema for a single table. @@ -121,9 +120,17 @@ will make an `as_of` class method available to your model. E.g.: Will execute: - WITH countries AS ( - SELECT * FROM history.countries WHERE #{1.year.ago.utc} >= valid_from AND #{1.year.ago.utc} < valid_to - ) SELECT * FROM countries + SELECT "countries".* FROM ( + SELECT "history"."countries".* FROM "history"."countries" + WHERE box( + point( date_part( 'epoch', '#{1.year.ago.utc}'::timestamp ), 0 ), + point( date_part( 'epoch', '#{1.year.ago.utc}'::timestamp ), 0 ) + ) && + box( + point( date_part( 'epoch', "history"."addresses"."valid_from" ), 0 ), + point( date_part( 'epoch', "history"."addresses"."valid_to" ), 0 ), + ) + ) AS "countries" This work on associations using temporal extensions as well: @@ -131,25 +138,32 @@ This work on associations using temporal extensions as well: Will execute: - WITH countries AS ( - SELECT * FROM history.countries WHERE #{1.year.ago.utc} >= valid_from AND #{1.year.ago.utc} < valid_to - ) SELECT * FROM countries LIMIT 1 + # ... countries history query ... + LIMIT 1 + + SELECT * FROM ( + SELECT "history"."compositions".* FROM "history"."compositions" + WHERE box( + point( date_part( 'epoch', '#{above_timestamp}'::timestamp ), 0 ), + point( date_part( 'epoch', '#{above_timestamp}'::timestamp ), 0 ) + ) && + box( + point( date_part( 'epoch', "history"."addresses"."valid_from" ), 0 ), + point( date_part( 'epoch', "history"."addresses"."valid_to" ), 0 ), + ) + ) AS "compositions" WHERE country_id = X - WITH compositions AS ( - SELECT * FROM history.countries WHERE #{above_timestamp} >= valid_from AND #{above_timestamp} < valid_to - ) SELECT * FROM compositions WHERE country_id = X - And `.joins` works as well: Country.as_of(1.month.ago).joins(:compositions) - + Will execute: - WITH countries AS ( - SELECT * FROM history.countries WHERE #{1.year.ago.utc} >= valid_from AND #{1.year.ago.utc} < valid_to - ), compositions AS ( - SELECT * FROM history.countries WHERE #{above_timestamp} >= valid_from AND #{above_timestamp} < valid_to - ) SELECT * FROM countries INNER JOIN countries ON compositions.country_id = countries.id + SELECT "countries".* FROM ( + # .. countries history query .. + ) AS "countries" INNER JOIN ( + # .. compositions history query .. + ) AS "compositions" ON compositions.country_id = countries.id More methods are provided, see the [TimeMachine](https://github.com/ifad/chronomodel/blob/master/lib/chrono_model/time_machine.rb) source @@ -168,16 +182,13 @@ them in your output, use `rake VERBOSE=true`. * `.includes` still doesn't work, but it'll fixed soon. - * Some monkeypatching has been necessary both to `ActiveRecord::Relation` and - to `Arel::Visitors::ToSql` to fix a bug with `WITH` queries generation. This - will be reported to the upstream with a pull request after extensive testing. + * The history queries are very verbose, they should be factored out in a + per-table stored procedure. * The migration statements extension is implemented using a Man-in-the-middle class that inherits from the PostgreSQL adapter, and that relies on some - private APIs. This should be made more maintainable, maybe by implementing - an extension framework for connection adapters. This library will (clearly) - never be merged into Rails, as it is against its principle of treating the - SQL database as a dummy data store. + private APIs. This should be made more maintainable, maybe by requiring + the use of `adapter: chronomodel` or `adapter: chrono_postgresql`. * The schema dumper is WAY TOO hacky. @@ -185,6 +196,16 @@ them in your output, use `rake VERBOSE=true`. [currently](http://archives.postgresql.org/pgsql-hackers/2012-08/msg01094.php) no way to identify a subtransaction belonging to the current transaction. + * The choice of using subqueries instead of [Common Table Expressions](http://www.postgresql.org/docs/9.0/static/queries-with.html) + was dictated by the fact that CTEs [currently acts as an optimization + fence](http://archives.postgresql.org/pgsql-hackers/2012-09/msg00700.php). + If it will be possible [to opt-out of the + fence](http://archives.postgresql.org/pgsql-hackers/2012-10/msg00024.php) + in the future, they will be probably be used again as they were [in the + past](https://github.com/ifad/chronomodel/commit/39ab32a), because the + resulting queries are much more readable, and do not inhibit using .from() + from ARel. + ## Contributing diff --git a/lib/chrono_model.rb b/lib/chrono_model.rb index 38bc2bc2..1c624da9 100644 --- a/lib/chrono_model.rb +++ b/lib/chrono_model.rb @@ -26,11 +26,4 @@ class Error < ActiveRecord::ActiveRecordError #:nodoc: # We need to override the "scoped" method on AR::Association for temporal # associations to work as well ActiveRecord::Associations::Association = ChronoModel::Patches::Association - - # This implements correct WITH syntax on PostgreSQL - Arel::Visitors::PostgreSQL = ChronoModel::Patches::Visitor - - # This adds .with support to ActiveRecord::Relation - ActiveRecord::Relation.instance_eval { include ChronoModel::Patches::QueryMethods } - ActiveRecord::Base.extend ChronoModel::Patches::Querying end diff --git a/lib/chrono_model/patches.rb b/lib/chrono_model/patches.rb index 71e8be37..ec61e3d0 100644 --- a/lib/chrono_model/patches.rb +++ b/lib/chrono_model/patches.rb @@ -13,96 +13,31 @@ module Patches # class Association < ActiveRecord::Associations::Association - # Add temporal Common Table Expressions (WITH queries) to the resulting - # scope, checking whether either the association class or the through - # association one are ChronoModels. + # If the association class or the through association are ChronoModels, + # then fetches the records from a virtual table using a subquery scoped + # to a specific timestamp. def scoped - return super unless _chrono_record? - - ctes = {} + scoped = super + return scoped unless _chrono_record? klass = reflection.options[:polymorphic] ? owner.public_send(reflection.foreign_type).constantize : reflection.klass - if klass.chrono? - ctes.update _chrono_ctes_for(klass) - end - - if respond_to?(:through_reflection) && through_reflection.klass.chrono? - ctes.update _chrono_ctes_for(through_reflection.klass) + history = if klass.chrono? + klass.history + elsif respond_to?(:through_reflection) && through_reflection.klass.chrono? + through_reflection.klass.history end - scoped = super - ctes.each {|table, cte| scoped = scoped.with(table, cte) } - return scoped.readonly + return scoped.readonly.from(history.virtual_table_at(owner.as_of_time)) end private - def _chrono_ctes_for(klass) - klass.as_of(owner.as_of_time).with_values - end - def _chrono_record? owner.respond_to?(:as_of_time) && owner.as_of_time.present? end end - # Adds the WITH queries (Common Table Expressions) support to - # ActiveRecord::Relation. - # - # \name is the CTE you want - # \value can be a plain SQL query or another AR::Relation - # - # Example: - # - # Post.with('posts', - # Post.from('history.posts'). - # where('? BETWEEN valid_from AND valid_to', 1.month.ago) - # ).where(:author_id => 1) - # - # yields: - # - # WITH posts AS ( - # SELECT * FROM history.posts WHERE ... BETWEEN valid_from AND valid_to - # ) SELECT * FROM posts - # - # PG Documentation: - # http://www.postgresql.org/docs/9.0/static/queries-with.html - # - module QueryMethods - attr_accessor :with_values - - def with(name, value) - clone.tap do |relation| - relation.with_values ||= {} - value = value.to_sql if value.respond_to? :to_sql - relation.with_values[name] = value - end - end - - def build_arel - super.tap {|arel| arel.with with_values if with_values.present? } - end - end - - module Querying - delegate :with, :to => :scoped - end - - # Fixes ARel's WITH visitor method with the correct SQL syntax - # - # FIXME: the .children.first is messy. This should be properly - # fixed in ARel. - # - class Visitor < Arel::Visitors::PostgreSQL - def visit_Arel_Nodes_With o - values = o.children.first.map do |name, value| - [name, ' AS (', value.is_a?(String) ? value : visit(value), ')'].join - end - "WITH #{values.join ', '}" - end - end - end end diff --git a/lib/chrono_model/time_gate.rb b/lib/chrono_model/time_gate.rb index 10138dc4..a59f42aa 100644 --- a/lib/chrono_model/time_gate.rb +++ b/lib/chrono_model/time_gate.rb @@ -9,8 +9,12 @@ module TimeGate module ClassMethods def as_of(time) time = Conversions.time_to_utc_string(time.utc) if time.kind_of? Time - as_of = scoped.with(table_name, - select(%[ #{quoted_table_name}.*, #{connection.quote(time)} AS "as_of_time"])) + + virtual_table = select(%[ + #{quoted_table_name}.*, #{connection.quote(time)} AS "as_of_time"] + ).to_sql + + as_of = scoped.from("(#{virtual_table}) #{quoted_table_name}") as_of.instance_variable_set(:@temporal, time) diff --git a/lib/chrono_model/time_machine.rb b/lib/chrono_model/time_machine.rb index 1686fa2e..08aa4e80 100644 --- a/lib/chrono_model/time_machine.rb +++ b/lib/chrono_model/time_machine.rb @@ -219,10 +219,7 @@ module HistoryMethods # Fetches as of +time+ records. # def as_of(time, scope = nil) - time = Conversions.time_to_utc_string(time.utc) if time.kind_of? Time - - as_of = superclass.unscoped.readonly. - with(superclass.table_name, at(time)) + as_of = superclass.unscoped.readonly.from(virtual_table_at(time)) # Add default scopes back if we're passed nil or a # specific scope, because we're .unscopeing above. @@ -242,9 +239,18 @@ def as_of(time, scope = nil) return as_of end + def virtual_table_at(time, name = nil) + name = name ? connection.quote_table_name(name) : + superclass.quoted_table_name + + "(#{at(time).to_sql}) #{name}" + end + # Fetches history record at the given time # def at(time) + time = Conversions.time_to_utc_string(time.utc) if time.kind_of? Time + from, to = quoted_history_fields unscoped. select("#{quoted_table_name}.*, '#{time}' AS as_of_time"). @@ -346,12 +352,13 @@ module QueryMethods def build_arel super.tap do |arel| - # Extract joined tables and add emporal WITH if appropriate - arel.join_sources.map {|j| - j.to_sql =~ /JOIN "([\w-]+)"(?:\s+"([\w-]+)")? ON/ && [$1, $2] - }.compact.each do |table, alias_| - next unless (model = TimeMachine.chrono_models[table]) - with(alias_ || table, model.history.at(@temporal)) + arel.join_sources.each do |join| + model = TimeMachine.chrono_models[join.left.table_name] + next unless model + + join.left = Arel::Nodes::SqlLiteral.new( + model.history.virtual_table_at(@temporal, join.left.table_alias || join.left.table_name) + ) end if @temporal end