Skip to content
This repository has been archived by the owner on Nov 15, 2024. It is now read-only.

Commit

Permalink
Merge pull request #625 from StartupsPoleEmploi/fix/office_admin_update
Browse files Browse the repository at this point in the history
PELBB-277: update es and mysql db after office admin changes
  • Loading branch information
lmarvaud authored Aug 16, 2022
2 parents 7526281 + e629047 commit 28808d4
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 95 deletions.
199 changes: 104 additions & 95 deletions labonneboite/scripts/create_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,116 +552,125 @@ def to_iterator(
firstid = key.__get__(rec, key) if rec else None


@timeit
def update_offices(table: Union[Type[OfficeAdminUpdate], Type[OfficeThirdPartyUpdate]]) -> None:
def update_offices_by_sirets(sirets: list,
office_to_update: Union[Type[OfficeAdminUpdate], Type[OfficeThirdPartyUpdate]]) -> None:
"""
Update offices (overload the data provided by the importer).
Update offices after office admin update
(overload the data provided by the importer).
"""
# Good engineering eliminates users being able to do the wrong thing as much as possible.
# But since it is possible to store multiple SIRETs, there is no longer any constraint of uniqueness
# on a SIRET. As a result, it shouldn't but there may be `n` entries in `table`
# for the same SIRET. We order the query by creation date ASC so that the most recent changes take
# priority over any older ones.
for office_to_update in to_iterator(db_session.query(table), table.id): # type: ignore

for siret in table.as_list(office_to_update.sirets):
for siret in sirets:

office: Office = Office.query.filter_by(siret=siret).first()
office: Office = Office.query.filter_by(siret=siret).first()

if office:
is_updated = False
# Apply changes in DB.
# , "email", "tel", "website"
if office_to_update.new_company_name and office.company_name != office_to_update.new_company_name:
office.company_name = office_to_update.new_company_name
if office:
is_updated = False
# Apply changes in DB.
# , "email", "tel", "website"
if office_to_update.new_company_name and office.company_name != office_to_update.new_company_name:
office.company_name = office_to_update.new_company_name
is_updated = True
if office_to_update.new_office_name and office.office_name != office_to_update.new_office_name:
office.office_name = office_to_update.new_office_name
is_updated = True
offices_attributes = [
"email_alternance", "phone_alternance", "website_alternance", "hiring", "score_alternance",
"social_network", "contact_mode"
]
update_attributes = [
"email_alternance", "phone_alternance", "website_alternance", "hiring", "score_alternance",
"social_network", "contact_mode"
]
for office_attr, update_attr in list(zip(offices_attributes, update_attributes)):
if getattr(office, office_attr) != getattr(office_to_update, update_attr) and getattr(
office_to_update, update_attr) is not None:
setattr(office, office_attr, getattr(office_to_update, update_attr))
is_updated = True
if office_to_update.new_office_name and office.office_name != office_to_update.new_office_name:
office.office_name = office_to_update.new_office_name

if office_to_update.remove_phone:
if office.tel != '':
office.tel = ''
is_updated = True
else:
if office.tel != office_to_update.new_phone:
office.tel = office_to_update.new_phone
is_updated = True
offices_attributes = [
"email_alternance", "phone_alternance", "website_alternance", "hiring", "score_alternance",
"social_network", "contact_mode"
]
update_attributes = [
"email_alternance", "phone_alternance", "website_alternance", "hiring", "score_alternance",
"social_network", "contact_mode"
]
for office_attr, update_attr in list(zip(offices_attributes, update_attributes)):
if getattr(office, office_attr) != getattr(office_to_update, update_attr) and getattr(
office_to_update, update_attr) is not None:
setattr(office, office_attr, getattr(office_to_update, update_attr))
is_updated = True

if office_to_update.remove_phone:
if office.tel != '':
office.tel = ''
for attr in ["email", "website"]:
if getattr(office_to_update, f"remove_{attr}"):
if getattr(office, attr) != '':
setattr(office, attr, '')
is_updated = True
else:
if office.tel != office_to_update.new_phone:
office.tel = office_to_update.new_phone
if getattr(office, attr) != getattr(office_to_update, f"new_{attr}"):
setattr(office, attr, getattr(office_to_update, f"new_{attr}"))
is_updated = True

for attr in ["email", "website"]:
if getattr(office_to_update, f"remove_{attr}"):
if getattr(office, attr) != '':
setattr(office, attr, '')
is_updated = True
else:
if getattr(office, attr) != getattr(office_to_update, f"new_{attr}"):
setattr(office, attr, getattr(office_to_update, f"new_{attr}"))
is_updated = True

if is_updated:
office.save()

# Apply changes in ElasticSearch.
body = {
'doc': {
'email': office.email,
'phone': office.tel,
'website': office.website,
"score": office.score,
'flag_alternance': 1 if office.flag_alternance else 0
}
if is_updated:
office.save()

# Apply changes in ElasticSearch.
body = {
'doc': {
'email': office.email,
'phone': office.tel,
'website': office.website,
"score": office.score,
'flag_alternance': 1 if office.flag_alternance else 0
}

scores_by_rome, boosted_romes = get_scores_by_rome_and_boosted_romes(office, office_to_update)
if scores_by_rome:
body['doc']['scores_by_rome'] = scores_by_rome
body['doc']['boosted_romes'] = boosted_romes

# The update API makes partial updates: existing `scalar` fields are overwritten,
# but `objects` fields are merged together.
# https://www.elastic.co/guide/en/elasticsearch/guide/1.x/partial-updates.html
# However `scores_by_rome` and `boosted_romes` need to be overwritten because they
# may change over time.
# To do this, we perform 2 requests: the first one resets `scores_by_rome` and
# `boosted_romes` and the second one populates them.
delete_body = {
'doc': {
'scores_by_rome': None,
'boosted_romes': None,
'scores_alternance_by_rome': None,
'boosted_alternance_romes': None
}
}

scores_by_rome, boosted_romes = get_scores_by_rome_and_boosted_romes(office, office_to_update)
if scores_by_rome:
body['doc']['scores_by_rome'] = scores_by_rome
body['doc']['boosted_romes'] = boosted_romes

# The update API makes partial updates: existing `scalar` fields are overwritten,
# but `objects` fields are merged together.
# https://www.elastic.co/guide/en/elasticsearch/guide/1.x/partial-updates.html
# However `scores_by_rome` and `boosted_romes` need to be overwritten because they
# may change over time.
# To do this, we perform 2 requests: the first one resets `scores_by_rome` and
# `boosted_romes` and the second one populates them.
delete_body = {
'doc': {
'scores_by_rome': None,
'boosted_romes': None,
'scores_alternance_by_rome': None,
'boosted_alternance_romes': None
}
}

# Unfortunately these cannot easily be bulked :-(
# The reason is there is no way to tell bulk to ignore missing documents (404)
# for a partial update. Tried it and failed it on Oct 2017 @vermeer.
es.Elasticsearch().update(index=settings.ES_INDEX,
doc_type=es.OFFICE_TYPE,
id=siret,
body=delete_body,
params={'ignore': 404})
es.Elasticsearch().update(index=settings.ES_INDEX,
doc_type=es.OFFICE_TYPE,
id=siret,
body=body,
params={'ignore': 404})

# Delete the current PDF thus it will be regenerated at the next download attempt.
pdf_util.delete_file(office)

# Unfortunately these cannot easily be bulked :-(
# The reason is there is no way to tell bulk to ignore missing documents (404)
# for a partial update. Tried it and failed it on Oct 2017 @vermeer.
es.Elasticsearch().update(index=settings.ES_INDEX,
doc_type=es.OFFICE_TYPE,
id=siret,
body=delete_body,
params={'ignore': 404})
es.Elasticsearch().update(index=settings.ES_INDEX,
doc_type=es.OFFICE_TYPE,
id=siret,
body=body,
params={'ignore': 404})

# Delete the current PDF thus it will be regenerated at the next download attempt.
pdf_util.delete_file(office)

@timeit
def update_offices(table: Union[Type[OfficeAdminUpdate], Type[OfficeThirdPartyUpdate]]) -> None:
"""
Update offices (overload the data provided by the importer).
"""
# Good engineering eliminates users being able to do the wrong thing as much as possible.
# But since it is possible to store multiple SIRETs, there is no longer any constraint of uniqueness
# on a SIRET. As a result, it shouldn't but there may be `n` entries in `table`
# for the same SIRET. We order the query by creation date ASC so that the most recent changes take
# priority over any older ones.
for office_to_update in to_iterator(db_session.query(table), table.id): # type: ignore
update_offices_by_sirets(table.as_list(office_to_update.sirets), office_to_update)


@timeit
Expand Down
5 changes: 5 additions & 0 deletions labonneboite/web/admin/views/office_admin_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from flask import flash, redirect, request, url_for
from flask import Markup
from flask_admin.contrib.sqla import ModelView
from flask_admin.form import BaseForm
from wtforms import validators
from labonneboite_common.siret import is_siret

Expand All @@ -11,6 +12,7 @@
from labonneboite.web.admin.forms import nospace_filter, phone_validator, strip_filter
from labonneboite.web.admin.utils import datetime_format, AdminModelViewMixin
from labonneboite.conf import settings
from labonneboite.scripts import create_index
from labonneboite.importer.settings import SCORE_ALTERNANCE_REDUCING_MINIMUM_THRESHOLD, \
HIRING_REDUCING_MINIMUM_THRESHOLD

Expand Down Expand Up @@ -523,6 +525,9 @@ def uncheck_checkbox(self, form, checkbox_field_name):
form[checkbox_field_name].description = Markup(
DESCRIPTION_TEMPLATE.format("Case cochée", checkbox_current_description))

def after_model_change(self, form: BaseForm, model: models.OfficeAdminUpdate, is_created: bool) -> None:
create_index.update_offices_by_sirets(model.as_list(form.data['sirets']), model)

def validate_form(self, form):
# Add http:// is missing
form['new_website'].data = format_url(form['new_website'].data)
Expand Down

0 comments on commit 28808d4

Please # to comment.