Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

📜 Add tags to mailchimp users #6080

Merged
merged 4 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 0 additions & 40 deletions website/auth.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import json
import logging
import os
import re
import urllib
from functools import wraps

import bcrypt
import boto3
import requests
from botocore.exceptions import ClientError as email_error
from botocore.exceptions import NoCredentialsError
from flask import request, session, redirect
Expand All @@ -20,7 +17,6 @@
from safe_format import safe_format
from utils import is_debug_mode, timems, times
from website import querylog

TOKEN_COOKIE_NAME = config["session"]["cookie_name"]

# A special value in the session, if this is set and we hit a 403 on the
Expand All @@ -36,42 +32,6 @@

env = os.getenv("HEROKU_APP_NAME")

MAILCHIMP_API_URL = None
MAILCHIMP_API_HEADERS = {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

if os.getenv("MAILCHIMP_API_KEY") and os.getenv("MAILCHIMP_AUDIENCE_ID"):
# The domain in the path is the server name, which is contained in the Mailchimp API key
MAILCHIMP_API_URL = (
"https://"
+ os.getenv("MAILCHIMP_API_KEY").split("-")[1]
+ ".api.mailchimp.com/3.0/lists/"
+ os.getenv("MAILCHIMP_AUDIENCE_ID")
)
MAILCHIMP_API_HEADERS = {
"Content-Type": "application/json",
"Authorization": "apikey " + os.getenv("MAILCHIMP_API_KEY"),
}


def mailchimp_subscribe_user(email, country):
# Request is always for teachers as only they can subscribe to newsletters
request_body = {"email_address": email, "status": "subscribed", "tags": [country, "teacher"]}
r = requests.post(MAILCHIMP_API_URL + "/members", headers=MAILCHIMP_API_HEADERS, data=json.dumps(request_body))

subscription_error = None
if r.status_code != 200 and r.status_code != 400:
subscription_error = True
# We can get a 400 if the email is already subscribed to the list. We should ignore this error.
if r.status_code == 400 and not re.match(".*already a list member", r.text):
subscription_error = True
# If there's an error in subscription through the API, we report it to the main email address
if subscription_error:
send_email(
config["email"]["sender"],
"ERROR - Subscription to Hedy newsletter on #",
email,
"<p>" + email + "</p><pre>Status:" + str(r.status_code) + " Body:" + r.text + "</pre>",
)


@querylog.timed
def check_password(password, hash):
Expand Down
16 changes: 2 additions & 14 deletions website/auth_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from safe_format import safe_format
from hedy_content import ALL_LANGUAGES, COUNTRIES
from utils import extract_bcrypt_rounds, is_heroku, is_testing_request, timems, times, remove_class_preview
from website.newsletter import create_subscription
from website.auth import (
MAILCHIMP_API_URL,
RESET_LENGTH,
SESSION_LENGTH,
TOKEN_COOKIE_NAME,
Expand All @@ -20,13 +20,11 @@
forget_current_user,
is_admin,
is_teacher,
mailchimp_subscribe_user,
make_salt,
password_hash,
prepare_user_db,
remember_current_user,
requires_login,
send_email,
send_email_template,
send_localized_email_template,
validate_#_data,
Expand Down Expand Up @@ -180,17 +178,7 @@ def #(self):
user, resp = self.store_new_account(body, body["email"].strip().lower())

if not is_testing_request(request) and "subscribe" in body:
# If we have a Mailchimp API key, we use it to add the subscriber through the API
if MAILCHIMP_API_URL:
mailchimp_subscribe_user(user["email"], body["country"])
# Otherwise, we send an email to notify about the subscription to the main email address
else:
send_email(
config["email"]["sender"],
"Subscription to Hedy newsletter on #",
user["email"],
"<p>" + user["email"] + "</p>",
)
create_subscription(user["email"], body.get("country"))

# We automatically login the user
cookie = make_salt()
Expand Down
2 changes: 2 additions & 0 deletions website/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from website.flask_helpers import render_template
from website.auth import current_user, is_teacher, requires_login, requires_teacher, \
refresh_current_user_from_db, is_second_teacher
from website.newsletter import add_class_created_to_subscription
from .database import Database
from .website_module import WebsiteModule, route

Expand Down Expand Up @@ -50,6 +51,7 @@ def create_class(self, user):
}

self.db.store_class(Class)
add_class_created_to_subscription(user['email'])
Copy link
Collaborator

@rix0rrr rix0rrr Jan 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know how fast or reliable the mailchimp API is, but in case it's slow or buggy: any way we can avoid doing this if it's already been done? (By keeping flags in Dynamo or sth?)

Also are we catching exceptions for low-value API calls like this, in case Mailchimp is down? (I think we're better off not setting a label than crashing the page)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit of a bummer this feels weird to be part of database.py, no?

Otherwise that would seem like a much better place for all these repetitive calls...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The calls to Mailchimp are defenseless at this point - no exception handling, retries, timeouts, and they are blocking the response while they do not need to. I don't think retries are needed in this case - I confirmed this with the POs. We definitely need exception handling (so that we do not fail the whole call) and timeouts (perhaps maximum 5 seconds?). I am not sure about making the call non-blocking. This could be a fire-and-forget call but I am on the fence on the cost vs benefit. What do you think?

I did not think of storing the data in the DB to reduce the Mailchimp calls. These operations are not frequent and if we have timeouts + exception handling, would it be necessary?

I was tempted to put this in the database.py but it truly does not belong there semantically. I am also a bit afraid moving this to the database.py file could lead to potential bugs in the future because other maintainers would probably not expect it either.

Copy link
Collaborator

@rix0rrr rix0rrr Jan 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other maintainers would probably not expect it either.

Yeah that's extremely fair. And I also hate to introduce another domain modeling layer just for the occasional call that needs to do both database and something else.

As for the calls themselves: I think tossing them onto a background thread and catching and logging errors is probably a nice middle ground. This should not be a huge lift; we just need to pay attention that the HTTP calls we are doing do not access mutable global state, and the library is thread-safe itself as well.

Something like:

def fire_and_forget(f):
  def run_this_and_catch_errors():
    try:
      f()
    except Exception:
      logger.exception('Exception in background thread')

  t = threading.Thread(target=run_this_and_catch_errors, daemon=True)
  t.start()

We have some threading code in there already, to deal with logging to S3 (and, quite anciently, to something called JSONBin). You could have a look there as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the pointers. I have seen the threading code but tbh I was a bit hesitant to spawn threads like that since they could go out of hand. Since the calls for creating or updating a subscription are not a lot, probably this will be ok and this is just me overthinking. So, let's go this way.

Btw I thought nowadays people use asyncio and I have no experience with it, so this is what I meant by cost of implementation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the calls for creating or updating a subscription are not a lot, probably this will be ok and this is just me overthinking

That's what I'm assuming. If we're at all concerned, we should probably use this: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor, set it to a reasonable number of workers (could be 1), and just send jobs to that (that we never wait for). Maybe that's just the safer default anyway...

Btw I thought nowadays people use asyncio and I have no experience with it, so this is what I meant by cost of implementation.

async io is quite a trend... and the benefits are also that it is trendy, not that it is better. Or at least, I'm not convinced that it is in common scenario's.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me check that I understand your suggestion. If we swap the thread.start() with creating a new ThreadPoolExecutor on every request, that too can go out of hand. So probably you mean that we should create a global ThreadPoolExecutor on app startup and use it whenever we need to run and not wait for a function. Is this correct? If this is the case though, based on the docs, a ThreadPoolExecutor does not seem like the right thing to use:

All threads enqueued to ThreadPoolExecutor will be joined before the interpreter can exit. Note that the exit handler which does this is executed before any exit handlers added using atexit. This means exceptions in the main thread must be caught and handled in order to signal threads to exit gracefully. For this reason, it is recommended that ThreadPoolExecutor not be used for long-running tasks.

I did not realize asyncio is coroutines. I'd also not go for that in our case.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t think that docstring precludes using it, but the individual threads are probably also fine for our use case

response = {"id": Class["id"]}
return make_response(response, 200)

Expand Down
6 changes: 6 additions & 0 deletions website/for_teachers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from website.server_types import SortedAdventure
from website.flask_helpers import render_template
from website.newsletter import add_class_customized_to_subscription
from website.auth import (
is_admin,
is_teacher,
Expand Down Expand Up @@ -898,6 +899,7 @@ def add_adventure(self, user, level):
customizations["other_settings"].remove("hide_parsons")

self.db.update_class_customizations(customizations)
add_class_customized_to_subscription(user['email'])
available_adventures = self.get_unused_adventures(adventures, teacher_adventures, adventure_names)

return render_partial('customize-class/partial-sortable-adventures.html',
Expand Down Expand Up @@ -935,6 +937,7 @@ def remove_adventure_from_class(self, user):
is_command_adventure=adventure_id in hedy_content.KEYWORDS_ADVENTURES)
adventures[int(level)].remove(sorted_adventure)
self.db.update_class_customizations(customizations)
add_class_customized_to_subscription(user['email'])
available_adventures = self.get_unused_adventures(adventures, teacher_adventures, adventure_names)

return render_partial('customize-class/partial-sortable-adventures.html',
Expand Down Expand Up @@ -978,6 +981,7 @@ def sort_adventures_in_class(self, user):
self.reorder_adventures(adventures[int(level)], from_sorted_adv_class=True)
self.reorder_adventures(customizations['sorted_adventures'][level])
self.db.update_class_customizations(customizations)
add_class_customized_to_subscription(user['email'])

return render_partial('customize-class/partial-sortable-adventures.html',
level=level,
Expand Down Expand Up @@ -1355,6 +1359,7 @@ def update_customizations(self, user, class_id):
}

self.db.update_class_customizations(customizations)
add_class_customized_to_subscription(user['email'])
response = {"success": gettext("class_customize_success")}

return make_response(response, 200)
Expand Down Expand Up @@ -1780,6 +1785,7 @@ def add_adventure_to_class_level(self, user, class_id, adventure_id, level, remo

self.reorder_adventures(customizations['sorted_adventures'][level])
self.db.update_class_customizations(customizations)
add_class_customized_to_subscription(user['email'])

@route("/create-adventure/", methods=["POST"])
@route("/create-adventure/<class_id>", methods=["POST"])
Expand Down
175 changes: 175 additions & 0 deletions website/newsletter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import os
import json
import hashlib
import requests
import logging
import threading
from config import config
from functools import wraps
from hedy_content import COUNTRIES
from website.auth import send_email

logger = logging.getLogger(__name__)

MAILCHIMP_API_URL = None
MAILCHIMP_API_HEADERS = {}
MAILCHIMP_TIMEOUT_SECONDS = 15
if os.getenv("MAILCHIMP_API_KEY") and os.getenv("MAILCHIMP_AUDIENCE_ID"):
# The domain in the path is the server name, which is contained in the Mailchimp API key
MAILCHIMP_API_URL = (
"https://"
+ os.getenv("MAILCHIMP_API_KEY").split("-")[1]
+ ".api.mailchimp.com/3.0/lists/"
+ os.getenv("MAILCHIMP_AUDIENCE_ID")
)
MAILCHIMP_API_HEADERS = {
"Content-Type": "application/json",
"Authorization": "apikey " + os.getenv("MAILCHIMP_API_KEY"),
}


def run_if_mailchimp_config_present(f):
""" Decorator that runs a particular Mailchimp-dependent function only if there are credentials available. """
@wraps(f)
def inner(*args, **kws):
if MAILCHIMP_API_URL:
f(*args, **kws)

return inner


def fire_and_forget(f):
def run():
try:
f()
except Exception as e:
logger.exception(f'Exception in background thread: {e}')

threading.Thread(target=run, daemon=True).start()


@run_if_mailchimp_config_present
def create_subscription(email, country):
""" Subscribes the user to the newsletter. Currently, users can subscribe to the newsletter only on # and
only if they are creating a teacher account. """
def create():
tags = [country, MailchimpTag.TEACHER]
create_mailchimp_subscriber(email, tags)
fire_and_forget(create)


@run_if_mailchimp_config_present
def update_subscription(current_email, new_email, new_country):
def update():
""" Updates the newsletter subscription when the user changes their email or/and their country """
response = get_mailchimp_subscriber(current_email)
if response and response.status_code == 200:
current_tags = response.json().get('tags', [])
if new_email != current_email:
# If user is subscribed, we remove the old email from the list and add the new one
new_tags = [t.get('name') for t in current_tags if t.get('name') not in COUNTRIES] + [new_country]
successfully_created = create_mailchimp_subscriber(new_email, new_tags)
if successfully_created:
delete_mailchimp_subscriber(current_email)
else:
# If the user did not change their email, check if the country needs to be updated
tags_to_update = get_country_tag_changes(current_tags, new_country)
if tags_to_update:
update_mailchimp_tags(current_email, tags_to_update)

fire_and_forget(update)


def add_class_created_to_subscription(email):
create_subscription_event(email, MailchimpTag.CREATED_CLASS)


def add_class_customized_to_subscription(email):
create_subscription_event(email, MailchimpTag.CUSTOMIZED_CLASS)


@run_if_mailchimp_config_present
def create_subscription_event(email, tag):
""" When certain events occur, e.g. a newsletter subscriber creates or customizes a class, these events
should be reflected in their subscription, so that we can send them relevant content """
def create():
r = get_mailchimp_subscriber(email)
if r.status_code == 200:
current_tags = r.json().get('tags', [])
if not any([t for t in current_tags if t.get('name') == tag]):
new_tags = current_tags + [to_mailchimp_tag(tag)]
update_mailchimp_tags(email, new_tags)
fire_and_forget(create)


def get_country_tag_changes(current_tags, country):
""" Returns the necessary alterations to the tags of the newsletter subscriber when they change their country.
The old country tag, if existing, should be removed, and the new country, if existing, should be added. """
current_country_tags = [t.get('name') for t in current_tags if t.get('name') in COUNTRIES]

if country in current_country_tags:
return []

changes = [to_mailchimp_tag(t, active=False) for t in current_country_tags]
if country:
changes.append(to_mailchimp_tag(country))
return changes


def to_mailchimp_tag(tag, active=True):
# https://mailchimp.com/developer/marketing/api/list-member-tags/
status = 'active' if active else 'inactive'
return {'name': tag, 'status': status}


class MailchimpTag:
TEACHER = 'teacher'
CREATED_CLASS = "created_class"
CUSTOMIZED_CLASS = "customized_class"


def create_mailchimp_subscriber(email, tag_names):
tag_names = [t for t in tag_names if t] # the country can be None, so filter it
request_body = {"email_address": email, "status": "subscribed", "tags": tag_names}
request_path = MAILCHIMP_API_URL + "/members/"
r = requests.post(request_path, headers=MAILCHIMP_API_HEADERS, data=json.dumps(request_body))

subscription_error = None
if r.status_code != 200 and r.status_code != 400:
subscription_error = True
# We can get a 400 if the email is already subscribed to the list. We should ignore this error.
if r.status_code == 400 and "already a list member" not in r.text:
subscription_error = True
# If there's an error in subscription through the API, we report it to the main email address
if subscription_error:
send_email(
config["email"]["sender"],
"ERROR - Subscription to Hedy newsletter",
f"email: {email} status: {r.status_code} body: {r.text}",
f"<p>{email}</p><pre>Status:{r.status_code} Body:{r.text}</pre>")
return not subscription_error


def get_mailchimp_subscriber(email):
request_path = f'{MAILCHIMP_API_URL}/members/{get_subscriber_hash(email)}'
return requests.get(request_path, headers=MAILCHIMP_API_HEADERS, timeout=MAILCHIMP_TIMEOUT_SECONDS)


def update_mailchimp_tags(email, tags):
request_path = f'{MAILCHIMP_API_URL}/members/{get_subscriber_hash(email)}/tags'
return requests.post(
request_path,
headers=MAILCHIMP_API_HEADERS,
data=json.dumps({'tags': tags}),
timeout=MAILCHIMP_TIMEOUT_SECONDS
)


def delete_mailchimp_subscriber(email):
request_path = f'{MAILCHIMP_API_URL}/members/{get_subscriber_hash(email)}'
requests.delete(request_path, headers=MAILCHIMP_API_HEADERS, timeout=MAILCHIMP_TIMEOUT_SECONDS)


def get_subscriber_hash(email):
""" Hashes the email with md5 to avoid emails with unescaped characters triggering errors """
return hashlib.md5(email.encode("utf-8")).hexdigest()
Loading
Loading