Skip to content

Commit

Permalink
Add filter_event_attributes method
Browse files Browse the repository at this point in the history
`filter_event_attributes` method can be used to override data before
sending to BigQuery. Some types of fields, like json or array are not
covered by whitelists.
  • Loading branch information
slawosz committed Dec 17, 2024
1 parent 706595e commit 00f4497
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 5 deletions.
6 changes: 5 additions & 1 deletion lib/dfe/analytics/entities.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,17 @@ module Entities
end
end

def filter_event_attributes(data)
data
end

def send_event(type, data)
return unless DfE::Analytics.enabled?

event = DfE::Analytics::Event.new
.with_type(type)
.with_entity_table_name(self.class.table_name)
.with_data(data)
.with_data(filter_event_attributes(data))
.with_tags(event_tags)
.with_request_uuid(RequestLocals.fetch(:dfe_analytics_request_id) { nil })

Expand Down
2 changes: 1 addition & 1 deletion lib/dfe/analytics/load_entity_batch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def build_event(record, table_name, entity_tag)
.with_type('import_entity')
.with_entity_table_name(table_name)
.with_tags([entity_tag])
.with_data(DfE::Analytics.extract_model_attributes(record))
.with_data(record.filter_event_attributes(DfE::Analytics.extract_model_attributes(record)))
end
end
end
Expand Down
86 changes: 86 additions & 0 deletions spec/dfe/analytics/entities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,24 @@
end
end

with_model :Setting do
table do |t|
t.json :settings_values
end

model do
include DfE::Analytics::Entities # needs to be explicit

def filter_event_attributes(data)
allowed_attributes = %w[foo]
filtered_data = data[:data]['settings_values'].slice(*allowed_attributes)
data[:data]['settings_values'] = filtered_data

data
end
end
end

before do
stub_const('Teacher', Class.new(Candidate))
allow(DfE::Analytics::SendEvents).to receive(:perform_later)
Expand Down Expand Up @@ -122,6 +140,28 @@
})])
end
end

context 'overriding model attributes' do
before do
allow(DfE::Analytics).to receive(:allowlist).and_return({
Setting.table_name.to_sym => ['settings_values']
})
end

it 'sends a filtered entity’s fields to BQ' do
Setting.create(settings_values: { foo: 'bar', field_with_pii: 'foo@bar.com' })

expect(DfE::Analytics::SendEvents).to have_received(:perform_later)
.with([a_hash_including({
'entity_table_name' => Setting.table_name,
'event_type' => 'create_entity',
'data' => [
# {"key"=>"settings_values", "value"=>["{\"foo\":\"bar\",\"field_with_pii\":\"foo@bar.com\"}"]}
{ 'key' => 'settings_values', 'value' => ['{"foo":"bar"}'] }
]
})])
end
end
end

context 'when no fields are specified in the analytics file' do
Expand Down Expand Up @@ -194,6 +234,29 @@
end
end

context 'overriding model attributes' do
before do
allow(DfE::Analytics).to receive(:allowlist).and_return({
Setting.table_name.to_sym => ['settings_values']
})
end

it 'sends a filtered entity’s fields to BQ' do
s = Setting.create(settings_values: { foo: 'bar', field_with_pii: 'foo@bar.com' })
s.update(settings_values: { foo: 'baz', field_with_pii: 'foo@bar.com' })

expect(DfE::Analytics::SendEvents).to have_received(:perform_later)
.with([a_hash_including({
'entity_table_name' => Setting.table_name,
'event_type' => 'update_entity',
'data' => [
# {"key"=>"settings_values", "value"=>["{\"foo\":\"baz\",\"field_with_pii\":\"foo@bar.com\"}"]}
{ 'key' => 'settings_values', 'value' => ['{"foo":"baz"}'] }
]
})])
end
end

context 'when no fields are specified in the analytics file' do
let(:allowlist_fields) { [] }

Expand Down Expand Up @@ -253,6 +316,29 @@
})])
end

context 'overriding model attributes' do
before do
allow(DfE::Analytics).to receive(:allowlist).and_return({
Setting.table_name.to_sym => ['settings_values']
})
end

it 'sends a filtered entity’s fields to BQ' do
s = Setting.create(settings_values: { foo: 'bar', field_with_pii: 'foo@bar.com' })
s.destroy

expect(DfE::Analytics::SendEvents).to have_received(:perform_later)
.with([a_hash_including({
'entity_table_name' => Setting.table_name,
'event_type' => 'delete_entity',
'data' => [
# {"key"=>"settings_values", "value"=>["{\"foo\":\"bar\",\"field_with_pii\":\"foo@bar.com\"}"]}
{ 'key' => 'settings_values', 'value' => ['{"foo":"bar"}'] }
]
})])
end
end

context 'when fields are specified in the analytics and hidden_pii file' do
let(:allowlist_fields) { %w[email_address dob] }
let(:hidden_pii_fields) { %w[dob] }
Expand Down
43 changes: 43 additions & 0 deletions spec/dfe/analytics/load_entities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@
end
end

with_model :Setting do
table do |t|
t.json :settings_values
end

model do
include DfE::Analytics::Entities # needs to be explicit

def filter_event_attributes(data)
allowed_attributes = %w[foo]
filtered_data = data[:data]['settings_values'].slice(*allowed_attributes)
data[:data]['settings_values'] = filtered_data

data
end
end
end

with_model :ModelWithCustomPrimaryKey do
table id: false do |t|
t.string :custom_key
Expand Down Expand Up @@ -98,4 +116,29 @@
expect { described_class.new(entity_name: ModelWithoutPrimaryKey.table_name).run(entity_tag: entity_tag) }.not_to raise_error
expect(Rails.logger).to have_received(:info).with(/Not processing #{ModelWithoutPrimaryKey.table_name} as it does not have a primary key/)
end

context 'overriding model attributes' do
before do
allow(DfE::Analytics).to receive(:allowlist).and_return({
Setting.table_name.to_sym => ['settings_values']
})
end

it 'sends a filtered entity’s fields to BQ' do
Setting.create(settings_values: { foo: 'bar', field_with_pii: 'foo@bar.com' })
described_class.new(entity_name: Setting.table_name).run(entity_tag: entity_tag)

# import process
expect(DfE::Analytics::SendEvents).to have_received(:perform_now).once do |payload|
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message

expect(payload.first['data']).to eq(
[{ 'key' => 'settings_values', 'value' => ['{"foo":"bar"}'] }]
)
end
end
end
end
8 changes: 5 additions & 3 deletions spec/dfe/analytics/load_entity_batch_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
allow(DfE::Analytics::SendEvents).to receive(:perform_now)

allow(DfE::Analytics).to receive(:allowlist).and_return({
Candidate.table_name.to_sym => %w[email_address]
Candidate.table_name.to_sym => %w[id dob email_address]
})

DfE::Analytics.initialize!
end

describe '#perform' do
Expand Down Expand Up @@ -49,7 +51,7 @@
it 'doesn’t split a batch unless it has to' do
c = Candidate.create(email_address: '12345678910')
c2 = Candidate.create(email_address: '12345678910')
stub_const('DfE::Analytics::LoadEntityBatch::BQ_BATCH_MAX_BYTES', 550)
stub_const('DfE::Analytics::LoadEntityBatch::BQ_BATCH_MAX_BYTES', 600)

described_class.perform_now('Candidate', [c.id, c2.id], entity_tag)

Expand Down Expand Up @@ -103,7 +105,7 @@
c = Candidate.create(email_address: '12345678910', dob: '12072000')
c2 = Candidate.create(email_address: '12345678910', dob: '12072000')

stub_const('DfE::Analytics::LoadEntityBatch::BQ_BATCH_MAX_BYTES', 300)
stub_const('DfE::Analytics::LoadEntityBatch::BQ_BATCH_MAX_BYTES', 500)

described_class.perform_now('Candidate', [c.id, c2.id], entity_tag)

Expand Down

0 comments on commit 00f4497

Please # to comment.