Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 111 additions & 21 deletions app/controllers/concerns/dedupable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,22 @@ def dedupe_index
authorize!
config = dedupe_config
mc = config[:model_class]
name_col = config[:name_column] || :name

groups = mc.all.group_by { |r| r.name.to_s.strip.downcase }
# Eager-load belongs_to associations used by record_extras
eager = mc.reflect_on_all_associations(:belongs_to).map(&:name)
all_records = mc.includes(eager).to_a

groups = all_records.group_by { |r| r.public_send(name_col).to_s.strip.downcase }
@possible_duplicates = groups.select { |_name, records| records.size > 1 }
@records_for_select = mc.order(:name).map { |r| [ r.name, r.id ] }
@records_for_select = all_records.sort_by { |r| r.public_send(name_col).to_s.downcase }.map { |r| [ r.public_send(name_col), r.id ] }

# Pre-compute tagging counts in a single query
join_assoc, _join_incl = dedupe_primary_join(mc)
assoc_reflection = mc.reflect_on_association(join_assoc)
fk = assoc_reflection.foreign_key
@tagging_counts = assoc_reflection.klass.group(fk).count

@dedupe = build_dedupe_vars(config)

render "dedupes/index"
Expand Down Expand Up @@ -43,6 +55,14 @@ def dedupe_preview
@keep_items = @record_to_keep.public_send(join_assoc).includes(join_incl)
@dedupe = build_dedupe_vars(config)

@extra_association_data = (config[:extra_associations] || []).map do |ea|
{
label: ea[:label],
delete_items: @record_to_delete.public_send(ea[:name]),
keep_items: @record_to_keep.public_send(ea[:name])
}
end

render "dedupes/preview"
end

Expand All @@ -56,8 +76,9 @@ def dedupe_update_keep
keep_param_key = "#{mn}_to_keep"

if params[keep_param_key].present?
editable = mc.column_names - %w[id created_at updated_at legacy_id]
record.update!(params.require(keep_param_key).permit(editable))
editable = mc.column_names - %w[id created_at updated_at legacy_id] + rich_text_attribute_names(mc)
non_blank = params.require(keep_param_key).permit(editable).to_h.reject { |_k, v| v.nil? || v == "" }
record.update!(non_blank) if non_blank.any?
end

head :ok
Expand All @@ -72,32 +93,48 @@ def dedupe_execute
config = dedupe_config
mc = config[:model_class]
mn = mc.model_name.singular
name_col = config[:name_column] || :name

record_to_delete = mc.find(params["#{mn}_to_delete_id"])
record_to_keep = mc.find(params["#{mn}_to_keep_id"])

keep_param_key = "#{mn}_to_keep"
if params[keep_param_key].present?
editable = mc.column_names - %w[id created_at updated_at legacy_id]
record_to_keep.update!(params.require(keep_param_key).permit(editable))
end
delete_name = record_to_delete.public_send(name_col)
keep_name = record_to_keep.public_send(name_col)

if respond_to?(:track_event, true)
track_event("dedupe.#{mn}", {
resource_type: mc.name,
resource_id: record_to_keep.id,
deleted_record: record_to_delete.attributes,
kept_record: { id: record_to_keep.id, name: record_to_keep.name },
associations_moved: record_to_delete.public_send(dedupe_primary_join(mc).first).count
})
end
ActiveRecord::Base.transaction do
keep_param_key = "#{mn}_to_keep"
if params[keep_param_key].present?
editable = mc.column_names - %w[id created_at updated_at legacy_id] + rich_text_attribute_names(mc)
non_blank = params.require(keep_param_key).permit(editable).to_h.reject { |_k, v| v.nil? || v == "" }
record_to_keep.update!(non_blank) if non_blank.any?
end

deduper = ModelDeduper.new(model_class: mc, logger: Rails.logger, dry_run: false, min_usage: 0)
deduper.merge(record_to_keep, record_to_delete)
if respond_to?(:track_event, true)
track_event("dedupe.#{mn}", {
resource_type: mc.name,
resource_id: record_to_keep.id,
deleted_record: record_to_delete.attributes,
kept_record: { id: record_to_keep.id, name: keep_name },
associations_moved: record_to_delete.public_send(dedupe_primary_join(mc).first).count
})
end

if (extra_assocs = config[:extra_associations])
extra_assocs.reject { |ea| ea[:display_only] }.each do |ea|
reassign_direct_association(record_to_delete, record_to_keep, ea[:name])
end
# Clear stale association caches so dependent: :destroy
# doesn't cascade-delete already-reassigned records.
record_to_delete.reload
end

deduper = ModelDeduper.new(model_class: mc, logger: Rails.logger, dry_run: false, min_usage: 0)
deduper.merge(record_to_keep, record_to_delete)
end

label = mc.model_name.human.pluralize
redirect_to url_for(action: :index),
notice: "#{label} merged successfully. '#{record_to_delete.name}' was merged into '#{record_to_keep.name}'."
notice: "#{label} merged successfully. '#{delete_name}' was merged into '#{keep_name}'."
rescue ActionPolicy::Unauthorized
raise
rescue StandardError => e
Expand Down Expand Up @@ -133,14 +170,67 @@ def dedupe_primary_join(mc)
[ assoc.name, poly.name ]
end

# Reassign a direct FK has_many association from one record to another.
# Detects duplicates using other FK columns and destroys them instead of moving.
# Also catches DB-level uniqueness violations as a fallback.
def reassign_direct_association(from_record, to_record, assoc_name)
assoc = from_record.class.reflect_on_association(assoc_name)
fk = assoc.foreign_key.to_s
join_class = assoc.klass

# Collect other belongs_to FK columns for duplicate detection,
# filtering to only columns that actually exist in the table.
db_columns = join_class.column_names.to_set
other_fk_cols = join_class.reflect_on_all_associations(:belongs_to).flat_map do |bt|
next [] if bt.foreign_key.to_s == fk
cols =
if bt.polymorphic?
[ bt.foreign_type.to_s, bt.foreign_key.to_s ]
else
[ bt.foreign_key.to_s ]
end
cols.select { |c| db_columns.include?(c) }
end

items = from_record.public_send(assoc_name).to_a

items.each do |item|
if other_fk_cols.any?
dedup_attrs = other_fk_cols.each_with_object({}) { |col, h| h[col] = item.public_send(col) }
if to_record.public_send(assoc_name).where(dedup_attrs).exists?
item.destroy!
next
end
end

begin
item.update!(fk => to_record.id)
rescue ActiveRecord::RecordInvalid, ActiveRecord::RecordNotUnique
# Model-level validation (e.g., name uniqueness) prevents the move;
# destroy the unmovable record since the keeper has a conflict.
item.destroy!
end
end
end

# Returns attribute names for has_rich_text fields (e.g. ["rhino_objective", "rhino_materials"]).
# Empty array for models without ActionText.
def rich_text_attribute_names(mc)
mc.reflect_on_all_associations(:has_one)
.select { |a| a.class_name == "ActionText::RichText" }
.map { |a| a.name.to_s.sub(/^rich_text_/, "") }
end

def build_dedupe_vars(config)
mc = config[:model_class]
mn = mc.model_name.singular
join_assoc, join_incl = dedupe_primary_join(mc)
opts = config[:belongs_to_options]
name_col = config[:name_column] || :name

{
domain: config[:domain] || mc.model_name.plural.to_sym,
name_column: name_col,
model_label: mc.model_name.human,
model_label_plural: mc.model_name.human.pluralize,
model_name: mn,
Expand Down
25 changes: 24 additions & 1 deletion app/controllers/workshops_controller.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class WorkshopsController < ApplicationController
include AhoyTracking
include AhoyTracking, Dedupable
skip_before_action :authenticate_user!, only: [ :index, :show ]

def index
Expand Down Expand Up @@ -237,6 +237,29 @@ def assign_associations(workshop)
workshop.save!
end

def dedupe_config
{
model_class: Workshop,
domain: :workshops,
name_column: :title,
belongs_to_options: -> { { "windows_type_id" => WindowsType.order(:name) } },
record_extras: ->(record) { "Type: #{record.windows_type&.name || 'None'}" },
extra_associations: [
# Polymorphic joins (handled by ModelDeduper during merge, shown here for preview)
{ name: :sectorable_items, label: "Sector Tags", display_only: true },
{ name: :quotable_item_quotes, label: "Quote Tags", display_only: true },
{ name: :bookmarks, label: "Bookmarks", display_only: true },
{ name: :workshop_logs, label: "Workshop Logs", display_only: true },
# Direct FK associations (handled by reassign_direct_association)
{ name: :associated_resources, label: "Associated Resources" },
{ name: :workshop_resources, label: "Workshop Resources" },
{ name: :workshop_series_children, label: "Series Children" },
{ name: :workshop_series_parents, label: "Series Parents" },
{ name: :workshop_variations, label: "Workshop Variations" }
]
}
end

def log_workshop_error(action, error)
Rails.logger.error "Workshop #{action} failed: #{error.class} - #{error.message}\n#{error.backtrace.join("\n")}"
end
Expand Down
59 changes: 43 additions & 16 deletions app/services/model_deduper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -119,23 +119,50 @@ def merge_join(primary, dupe, join)
type_col = join[:polymorphic_type_column]
id_col = join[:polymorphic_id_column]

existing_taggings = jc
.where(fk => primary.id)
.pluck(type_col, id_col)
.map { |type, id| "#{type}_#{id}" }
.to_set

items_to_move = jc.where(fk => dupe.id)

items_to_move.find_each do |item|
tagging_key = "#{item.public_send(type_col)}_#{item.public_send(id_col)}"

if existing_taggings.include?(tagging_key)
item.destroy!
logger.info " deleted duplicate #{jc.name} #{item.id} (primary already has it)"
if fk == id_col
# Deduplicating the polymorphic side (e.g. Workshop via categorizable_items).
# Use non-polymorphic FK columns for duplicate detection.
other_fk_cols = jc.reflect_on_all_associations(:belongs_to)
.reject(&:polymorphic?)
.map { |a| a.foreign_key.to_s }

existing_keys = if other_fk_cols.any?
jc.where(fk => primary.id)
.pluck(*other_fk_cols)
.map { |vals| Array(vals).join("_") }
.to_set
else
item.update!(fk => primary.id)
logger.info " moved #{jc.name} #{item.id} to primary"
Set.new
end

jc.where(fk => dupe.id).find_each do |item|
key = other_fk_cols.map { |c| item.public_send(c) }.join("_")
if other_fk_cols.any? && existing_keys.include?(key)
item.destroy!
logger.info " deleted duplicate #{jc.name} #{item.id} (primary already has it)"
else
item.update!(fk => primary.id)
logger.info " moved #{jc.name} #{item.id} to primary"
end
end
else
# Normal case: deduplicating the non-polymorphic side (e.g. Category via categorizable_items).
existing_taggings = jc
.where(fk => primary.id)
.pluck(type_col, id_col)
.map { |type, id| "#{type}_#{id}" }
.to_set

jc.where(fk => dupe.id).find_each do |item|
tagging_key = "#{item.public_send(type_col)}_#{item.public_send(id_col)}"

if existing_taggings.include?(tagging_key)
item.destroy!
logger.info " deleted duplicate #{jc.name} #{item.id} (primary already has it)"
else
item.update!(fk => primary.id)
logger.info " moved #{jc.name} #{item.id} to primary"
end
end
end

Expand Down
Loading