Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Arcspc 1232 replace ead xform #4

Merged
merged 2 commits into from
Jul 17, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions backend/plugin_init.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def self.from_resource_id(resource_id)
"Access Restrict.",
"URN",
"Collection Title",
"Originator_Resource",
"Collection Date",
"Collection ID",
"Parent 1",
Expand Down Expand Up @@ -63,11 +64,11 @@ def process_row(row)
# e.g.(1, 1a, 1b, 2, 2a, 3, 4, 4a)
def each
DB.open do |db|
# There are five placeholder positions in this cursed SQL, all of which
# There are eight placeholder positions in this cursed SQL, all of which
# should be filled with @resource_id - MySQL doesn't support correlated subqueries
# so one can't reference the outer resource ID in the union within the outer query,
# and I don't trust it to push down a limit into the CTE either tbh.
dataset = db[<<~SQL, *([@resource_id] * 5)]
dataset = db[<<~SQL, *([@resource_id] * 8)]
WITH RECURSIVE ao AS (SELECT
id,
root_record_id,
Expand All @@ -90,7 +91,7 @@ def each
P.level_id,
P.position,
P.parent_id,
JSON_ARRAY_APPEND(ao.parent_titles, '$', P.title) as parent_titles,
JSON_ARRAY_INSERT(ao.parent_titles, '$[0]', P.title) as parent_titles,
depth + 1 as depth,
(POWER(10, 20 - (2 * (ao.depth + 1))) * (P.position + 1)) + ao.adjusted_pos as adjusted_pos
FROM archival_object P INNER JOIN ao ON P.parent_id = ao.id)
Expand All @@ -102,17 +103,18 @@ def each
max(ao_date.end) AS "End Year",
ao.component_id AS "Unit ID",
level.value AS "Level",
parent_titles->'$[0]' as "Parent 1",
parent_titles->'$[1]' as "Parent 2",
parent_titles->'$[2]' as "Parent 3",
parent_titles->'$[3]' as "Parent 4",
parent_titles->'$[4]' as "Parent 5",
parent_titles->'$[5]' as "Parent 6",
group_concat(creators.sort_name SEPARATOR ';') AS "Originator",
parent_titles->'$[1]' as "Parent 1",
parent_titles->'$[2]' as "Parent 2",
parent_titles->'$[3]' as "Parent 3",
parent_titles->'$[4]' as "Parent 4",
parent_titles->'$[5]' as "Parent 5",
parent_titles->'$[6]' as "Parent 6",
tc.indicator AS "Container 1",
sc.indicator_2 AS "Container 2",
sc.indicator_3 AS "Container 3",
group_concat(fv.file_uri SEPARATOR ';') AS "URN",
group_concat(resource_creators.sort_name SEPARATOR ';') AS "Originator_Resource",
r.title AS "Collection Title",
group_concat(resource_date.expression SEPARATOR '; ') AS "Collection Date",
REGEXP_REPLACE(
Expand All @@ -135,6 +137,33 @@ def each
LEFT JOIN instance_do_link_rlshp idlr ON i.id = idlr.instance_id
LEFT JOIN digital_object do ON do.id = idlr.digital_object_id
LEFT JOIN file_version fv ON fv.digital_object_id = do.id
LEFT JOIN (SELECT r_p.id, sort_name FROM resource r_p
JOIN linked_agents_rlshp r_lar_p ON r_lar_p.resource_id = r_p.id
JOIN agent_person r_ap ON r_lar_p.agent_person_id = r_ap.id
JOIN name_person r_np ON r_np.agent_person_id = r_ap.id
JOIN enumeration_value r_agent_role_p ON r_agent_role_p.id = r_lar_p.role_id
WHERE r_p.id = ?
AND r_agent_role_p.value IN ('creator', 'source')
AND r_np.is_display_name = true
UNION ALL
SELECT r_f.id, sort_name FROM resource r_f
JOIN linked_agents_rlshp r_lar_f ON r_lar_f.resource_id = r_f.id
JOIN agent_family r_af ON r_af.id = r_lar_f.agent_family_id
JOIN name_family r_nf ON r_nf.agent_family_id = r_af.id
JOIN enumeration_value r_agent_role_f ON r_agent_role_f.id = r_lar_f.role_id
WHERE r_f.id = ?
AND r_agent_role_f.value IN ('creator', 'source')
AND r_nf.is_display_name = true
UNION ALL
SELECT r_ce.id, sort_name FROM resource r_ce
JOIN linked_agents_rlshp r_lar_ce ON r_lar_ce.resource_id = r_ce.id
JOIN agent_corporate_entity r_ace ON r_ace.id = r_lar_ce.agent_corporate_entity_id
JOIN name_corporate_entity r_nce ON r_nce.agent_corporate_entity_id = r_ace.id
JOIN enumeration_value r_agent_role_ce ON r_agent_role_ce.id = r_lar_ce.role_id
WHERE r_ce.id = ?
AND r_agent_role_ce.value IN ('creator', 'source')
AND r_nce.is_display_name = true
) resource_creators ON resource_creators.id = r.id
LEFT JOIN (SELECT ao_p.id, sort_name FROM archival_object ao_p
JOIN linked_agents_rlshp lar ON lar.archival_object_id = ao_p.id
JOIN agent_person ap ON ap.id = lar.agent_person_id
Expand Down Expand Up @@ -165,17 +194,24 @@ def each
LEFT JOIN note ON note.archival_object_id = ao.id
LEFT JOIN date ao_date ON ao_date.archival_object_id = ao.id
LEFT JOIN date resource_date ON resource_date.resource_id = r.id
WHERE r.id = ?
GROUP BY r.id, ao_id, ao.ref_id, ao.component_id, ao.title, ao.parent_titles, ao.adjusted_pos, level, tc.indicator, sc.indicator_2, sc.indicator_3, note.id
ORDER BY adjusted_pos ASC, ao.id ASC, note.id ASC
WHERE r.id = ?
GROUP BY r.id, ao_id, ao.ref_id, ao.component_id, ao.title, ao.parent_titles, ao.adjusted_pos, level, tc.indicator, sc.indicator_2, sc.indicator_3, note.id
ORDER BY adjusted_pos ASC, ao.id ASC, note.id ASC
SQL
# Iterate over the dataset. Multiple rows with the same AO id are present, due to it
# being infeasible to group notes together, so it's necessary to collect and
# process multiple input_rows into one output row.
current_id = nil # current AO id, add output row to to_dispatch when it changes
current_csv_row = {}
# holds CSV rows until ready to dispatch, emptied after dispatch, contains headers first
to_dispatch = [CSV_HEADERS.map{|val|%Q|"#{val.to_s}"|}.join(",")]
to_dispatch = [CSV_HEADERS.map{|val|
case val
when /Originator_Resource/
'"Originator"'
else
%Q|"#{val.to_s}"|
end
}.join(",")]
Log.debug('Got to dataset.each')
dataset.each do |input_row|
Log.debug("current_id: #{current_id}")
Expand Down Expand Up @@ -213,6 +249,8 @@ def each
current_csv_row[:"Access Restrict."] = note['subnotes'].map {|sn| sn['content']}.join(' ')
end
end


Log.debug("to_dispatch at end of processing: #{to_dispatch}")
end # end dataset.each
# Because we dispatch when ao_id _changes_ the last row is left over to dispatch
Expand Down