diff --git a/Languages/en_US/General.php b/Languages/en_US/General.php
index 9c7d9460ea..dd58718e6f 100644
--- a/Languages/en_US/General.php
+++ b/Languages/en_US/General.php
@@ -12,8 +12,6 @@
//https://developers.google.com/recaptcha/docs/language
$txt['lang_recaptcha'] = 'en';
-// Ensure you remember to use uppercase for character set strings.
-$txt['lang_character_set'] = 'UTF-8';
// Character set right to left? 0 = ltr; 1 = rtl
$txt['lang_rtl'] = '0';
diff --git a/Languages/en_US/Install.php b/Languages/en_US/Install.php
index d2083d22b8..0626de28f6 100644
--- a/Languages/en_US/Install.php
+++ b/Languages/en_US/Install.php
@@ -2,8 +2,7 @@
// Version: 3.0 Alpha 2; Install
-// These should be the same as those in index.language.php.
-$txt['lang_character_set'] = 'UTF-8';
+// This should be the same as the one in General.php.
$txt['lang_rtl'] = '0';
$txt['install_step_welcome'] = 'Welcome';
diff --git a/Sources/Actions/Admin/ACP.php b/Sources/Actions/Admin/ACP.php
index bf954485ae..69467c0308 100644
--- a/Sources/Actions/Admin/ACP.php
+++ b/Sources/Actions/Admin/ACP.php
@@ -1059,7 +1059,7 @@ public static function saveSettings(array &$config_vars): void
// Fix the darn stupid cookiename! (more may not be allowed, but these for sure!)
if (isset($_POST['cookiename'])) {
- $_POST['cookiename'] = preg_replace('~[,;\s\.$]+~' . (Utils::$context['utf8'] ? 'u' : ''), '', $_POST['cookiename']);
+ $_POST['cookiename'] = preg_replace('~[,;\s\.$]+~u', '', $_POST['cookiename']);
}
// Fix the forum's URL if necessary.
diff --git a/Sources/Actions/Admin/Languages.php b/Sources/Actions/Admin/Languages.php
index b879d2715f..2d617664aa 100644
--- a/Sources/Actions/Admin/Languages.php
+++ b/Sources/Actions/Admin/Languages.php
@@ -887,7 +887,11 @@ function ($val1, $val2) {
$replace_array = [];
foreach ($primary_settings as $setting => $type) {
- $replace_array['~\$txt\[\'' . $setting . '\'\]\s*=\s*[^\r\n]+~'] = '$txt[\'' . $setting . '\'] = ' . ($type === 'bool' ? (!empty($_POST[$setting]) ? 'true' : 'false') : '\'' . ($setting = 'native_name' ? htmlentities(Utils::htmlspecialcharsDecode($_POST[$setting]), ENT_QUOTES, Utils::$context['character_set']) : preg_replace('~[^\w-]~i', '', $_POST[$setting])) . '\'') . ';';
+ if ($setting === 'lang_character_set') {
+ $replace_array['/\$txt\[\'' . $setting . '\'\]\s*=\s*[^\r\n]+\R/u'] = '';
+ } else {
+ $replace_array['~\$txt\[\'' . $setting . '\'\]\s*=\s*[^\r\n]+~u'] = '$txt[\'' . $setting . '\'] = ' . ($type === 'bool' ? (!empty($_POST[$setting]) ? '\'1\'' : '\'0\'') : '\'' . ($setting = 'native_name' ? htmlentities(Utils::htmlspecialcharsDecode($_POST[$setting]), ENT_QUOTES, 'UTF-8') : preg_replace('~[^\w-]~i', '', $_POST[$setting])) . '\'') . ';';
+ }
}
$current_data = preg_replace(array_keys($replace_array), array_values($replace_array), $current_data);
@@ -910,6 +914,10 @@ function ($val1, $val2) {
Utils::$context['primary_settings']['name'] = Utils::ucwords(strtr($lang_id, ['_' => ' ', '-utf8' => '']));
foreach ($primary_settings as $setting => $type) {
+ if ($setting === 'lang_character_set') {
+ continue;
+ }
+
Utils::$context['primary_settings'][$setting] = [
'label' => str_replace('lang_', '', $setting),
'value' => $type === 'bool' ? !empty(Lang::$txt[$setting]) : Lang::$txt[$setting],
@@ -1005,12 +1013,12 @@ function ($val1, $val2) {
// Also, remove any lines for uneditable variables like $forum_copyright from the working data.
$entries = [];
- foreach (preg_split('~^(?=\$(?:' . implode('|', $string_types) . ')\[\'([^\n]+?)\'\])~m' . (Utils::$context['utf8'] ? 'u' : ''), preg_replace('~\s*\n(\$(?!(?:' . implode('|', $string_types) . '))[^\n]*)~', '', file_get_contents($current_file))) as $blob) {
+ foreach (preg_split('~^(?=\$(?:' . implode('|', $string_types) . ')\[\'([^\n]+?)\'\])~mu', preg_replace('~\s*\n(\$(?!(?:' . implode('|', $string_types) . '))[^\n]*)~', '', file_get_contents($current_file))) as $blob) {
// Comment lines at the end of the blob can make terrible messes
- $blob = preg_replace('~(\n[ \t]*//[^\n]*)*$~' . (Utils::$context['utf8'] ? 'u' : ''), '', $blob);
+ $blob = preg_replace('~(\n[ \t]*//[^\n]*)*$~u', '', $blob);
// Extract the variable
- if (preg_match('~^\$(' . implode('|', $string_types) . ')\[\'([^\n]+?)\'\](?:\[\'?([^\n]+?)\'?\])?\s?=\s?(.+);([ \t]*(?://[^\n]*)?)$~ms' . (Utils::$context['utf8'] ? 'u' : ''), strtr($blob, ["\r" => '']), $matches)) {
+ if (preg_match('~^\$(' . implode('|', $string_types) . ')\[\'([^\n]+?)\'\](?:\[\'?([^\n]+?)\'?\])?\s?=\s?(.+);([ \t]*(?://[^\n]*)?)$~msu', strtr($blob, ["\r" => '']), $matches)) {
// If no valid subkey was found, we need it to be explicitly null
$matches[3] = isset($matches[3]) && $matches[3] !== '' ? $matches[3] : null;
@@ -1093,7 +1101,7 @@ function ($val1, $val2) {
# Followed by a comma or the end of the string
(?=,|$)
- /x' . (Utils::$context['utf8'] ? 'u' : ''), $entryValue['entry'], $matches);
+ /xu', $entryValue['entry'], $matches);
if (empty($m)) {
continue;
@@ -1325,7 +1333,7 @@ function ($val1, $val2) {
// Apply our changes.
foreach ($final_saves as $save) {
if (!empty($save['is_regex'])) {
- $file_contents = preg_replace('~' . $save['find'] . '~' . (Utils::$context['utf8'] ? 'u' : ''), $save['replace'], $file_contents);
+ $file_contents = preg_replace('~' . $save['find'] . '~u', $save['replace'], $file_contents);
} else {
$file_contents = str_replace($save['find'], $save['replace'], $file_contents);
}
@@ -1567,7 +1575,7 @@ public static function list_getLanguages(): array
$languages[$lang['filename']] = [
'id' => $lang['filename'],
'count' => 0,
- 'char_set' => $txt['lang_character_set'],
+ 'char_set' => 'UTF-8',
'default' => Lang::$default == $lang['filename'] || (Lang::$default == '' && $lang['filename'] == 'en_US'),
'locale' => $txt['lang_locale'],
'name' => $lang['name'],
diff --git a/Sources/Actions/Admin/Maintenance.php b/Sources/Actions/Admin/Maintenance.php
index e8db19d2c2..c0f5414017 100644
--- a/Sources/Actions/Admin/Maintenance.php
+++ b/Sources/Actions/Admin/Maintenance.php
@@ -170,7 +170,7 @@ public function routine(): void
public function database(): void
{
// Show some conversion options?
- Utils::$context['convert_entities'] = isset(Config::$modSettings['global_character_set']) && Config::$modSettings['global_character_set'] === 'UTF-8';
+ Utils::$context['convert_entities'] = true;
if (Config::$db_type == 'mysql') {
$colData = Db::$db->list_columns('{db_prefix}messages', true);
@@ -1024,11 +1024,6 @@ public function entitiesToUnicode(): void
{
User::$me->isAllowedTo('admin_forum');
- // Check to see if UTF-8 is currently the default character set.
- if (Config::$modSettings['global_character_set'] !== 'UTF-8') {
- ErrorHandler::fatalLang('entity_convert_only_utf8');
- }
-
// Some starting values.
Utils::$context['table'] = empty($_REQUEST['table']) ? 0 : (int) $_REQUEST['table'];
Utils::$context['start'] = empty($_REQUEST['start']) ? 0 : (int) $_REQUEST['start'];
diff --git a/Sources/Actions/AttachmentDownload.php b/Sources/Actions/AttachmentDownload.php
index 9acc7d4450..492557a28b 100644
--- a/Sources/Actions/AttachmentDownload.php
+++ b/Sources/Actions/AttachmentDownload.php
@@ -23,7 +23,6 @@
use SMF\Config;
use SMF\Db\DatabaseApi as Db;
use SMF\IntegrationHook;
-use SMF\Lang;
use SMF\User;
use SMF\Utils;
@@ -320,15 +319,6 @@ public function execute(): void
*/
protected function __construct()
{
- // Some defaults that we need.
- if (!isset(Utils::$context['character_set'])) {
- Utils::$context['character_set'] = empty(Config::$modSettings['global_character_set']) ? (empty(Lang::$txt['lang_character_set']) ? 'ISO-8859-1' : Lang::$txt['lang_character_set']) : Config::$modSettings['global_character_set'];
- }
-
- if (!isset(Utils::$context['utf8'])) {
- Utils::$context['utf8'] = Utils::$context['character_set'] === 'UTF-8';
- }
-
// Which attachment was requested?
$this->id = $_REQUEST['attach'] = isset($_REQUEST['attach']) ? (int) $_REQUEST['attach'] : (int) (isset($_REQUEST['id']) ? (int) $_REQUEST['id'] : 0);
diff --git a/Sources/Actions/AttachmentUpload.php b/Sources/Actions/AttachmentUpload.php
index ad92aac40d..7de9db67e0 100644
--- a/Sources/Actions/AttachmentUpload.php
+++ b/Sources/Actions/AttachmentUpload.php
@@ -558,7 +558,7 @@ protected function sendResponse(): void
}
// Set the header.
- header('content-type: application/json; charset=' . Utils::$context['character_set'] . '');
+ header('content-type: application/json; charset=UTF-8');
echo Utils::jsonEncode($this->_response ? $this->_response : []);
diff --git a/Sources/Actions/Feed.php b/Sources/Actions/Feed.php
index 746798c604..9e7ee3ed6c 100644
--- a/Sources/Actions/Feed.php
+++ b/Sources/Actions/Feed.php
@@ -535,11 +535,11 @@ public function emit(): void
$filename[] = $this->format;
- $filename = preg_replace(Utils::$context['utf8'] ? '/[^\p{L}\p{M}\p{N}\-]+/u' : '/[\s_,.\/\\;:\'<>?|\[\]{}~!@#$%^&*()=+`]+/', '_', str_replace('"', '', Utils::htmlspecialcharsDecode(strip_tags(implode('-', $filename)))));
+ $filename = preg_replace('/[^\p{L}\p{M}\p{N}\-]+/u', '_', str_replace('"', '', Utils::htmlspecialcharsDecode(strip_tags(implode('-', $filename)))));
$file = [
'filename' => $filename . '.xml',
- 'mime_type' => self::MIME_TYPES[$this->format] . '; charset=' . (empty(Utils::$context['character_set']) ? 'UTF-8' : Utils::$context['character_set']),
+ 'mime_type' => self::MIME_TYPES[$this->format] . '; charset=UTF-8',
'content' => implode('', $this->xml),
'disposition' => isset($_GET['download']) ? 'attachment' : 'inline',
];
@@ -2765,7 +2765,7 @@ public static function build(string $format, array $data, array $metadata, strin
Utils::$context['feed'] = [];
// First, output the xml header.
- Utils::$context['feed']['header'] = '' . ($doctype !== '' ? "\n" . trim($doctype) : '');
+ Utils::$context['feed']['header'] = '<' . '?xml version="1.0" encoding="UTF-8"?' . '>' . ($doctype !== '' ? "\n" . trim($doctype) : '');
// Are we outputting an rss feed or one with more information?
if ($format == 'rss' || $format == 'rss2') {
diff --git a/Sources/Actions/Login2.php b/Sources/Actions/Login2.php
index 2503b9993b..410a68d3f0 100644
--- a/Sources/Actions/Login2.php
+++ b/Sources/Actions/Login2.php
@@ -519,7 +519,7 @@ protected function checkPasswordFallbacks(): bool
$other_passwords[] = sha1(strtolower(User::$profiles[User::$my_id]['member_name']) . Utils::htmlspecialcharsDecode($_POST['passwrd']));
// Perhaps we converted to UTF-8 and have a valid password being hashed differently.
- if (Utils::$context['character_set'] == 'UTF-8' && !empty(Config::$modSettings['previousCharacterSet']) && Config::$modSettings['previousCharacterSet'] != 'utf8') {
+ if (!empty(Config::$modSettings['previousCharacterSet']) && Config::$modSettings['previousCharacterSet'] != 'utf8') {
// Try iconv first, for no particular reason.
if (function_exists('iconv')) {
$other_passwords['iconv'] = sha1(strtolower(iconv('UTF-8', Config::$modSettings['previousCharacterSet'], User::$profiles[User::$my_id]['member_name'])) . Utils::htmlspecialcharsDecode(iconv('UTF-8', Config::$modSettings['previousCharacterSet'], $_POST['passwrd'])));
diff --git a/Sources/Actions/Memberlist.php b/Sources/Actions/Memberlist.php
index d9f2eb36a6..0698e52a01 100644
--- a/Sources/Actions/Memberlist.php
+++ b/Sources/Actions/Memberlist.php
@@ -293,7 +293,7 @@ public function all(): void
}
if (!is_numeric($_REQUEST['start'])) {
- if (preg_match('~^[^\'\\\\/]~' . (Utils::$context['utf8'] ? 'u' : ''), Utils::strtolower($_REQUEST['start']), $match) === 0) {
+ if (preg_match('~^[^\'\\\\/]~u', Utils::strtolower($_REQUEST['start']), $match) === 0) {
ErrorHandler::fatal('Are you a wannabe hacker?', false);
}
diff --git a/Sources/Actions/RequestMembers.php b/Sources/Actions/RequestMembers.php
index e30ef7a39f..81cfe5b099 100644
--- a/Sources/Actions/RequestMembers.php
+++ b/Sources/Actions/RequestMembers.php
@@ -60,9 +60,7 @@ public function execute(): void
{
User::$me->checkSession('get');
- if (Utils::$context['utf8'] || function_exists('mb_convert_encoding')) {
- header('content-type: text/plain; charset=UTF-8');
- }
+ header('content-type: text/plain; charset=UTF-8');
$request = Db::$db->query(
'',
@@ -81,15 +79,9 @@ public function execute(): void
);
while ($row = Db::$db->fetch_assoc($request)) {
- if (!Utils::$context['utf8']) {
- if (($temp = @mb_convert_encoding($row['real_name'], 'UTF-8', Utils::$context['character_set'])) !== false) {
- $row['real_name'] = $temp;
- }
- }
-
$row['real_name'] = strtr($row['real_name'], ['&' => '&', '<' => '<', '>' => '>', '"' => '"']);
- $row['real_name'] = Utils::entityDecode($row['real_name'], true);
+ $row['real_name'] = Utils::entityDecode($row['real_name']);
echo $row['real_name'], "\n";
}
diff --git a/Sources/Autolinker.php b/Sources/Autolinker.php
index b7a96dcd1e..c147a2e859 100644
--- a/Sources/Autolinker.php
+++ b/Sources/Autolinker.php
@@ -154,13 +154,6 @@ class Autolinker
* Internal properties
*********************/
- /**
- * @var string
- *
- * The character encoding being used.
- */
- protected string $encoding = 'UTF-8';
-
/**
* @var bool
*
@@ -258,20 +251,6 @@ public function __construct(bool $only_basic = false)
{
$this->only_basic = $only_basic;
- if (!empty(Utils::$context['utf8'])) {
- $this->encoding = 'UTF-8';
- } else {
- $this->encoding = !empty(Config::$modSettings['global_character_set']) ? Config::$modSettings['global_character_set'] : (!empty(Lang::$txt['lang_character_set']) ? Lang::$txt['lang_character_set'] : $this->encoding);
-
- if (in_array($this->encoding, mb_encoding_aliases('UTF-8'))) {
- $this->encoding = 'UTF-8';
- }
- }
-
- if ($this->encoding !== 'UTF-8') {
- self::$domain_label_chars = '0-9A-Za-z\-';
- }
-
// In case a mod wants to control behaviour for a special URI scheme.
if (!self::$integrate_autolinker_schemes_done) {
IntegrationHook::call('integrate_autolinker_schemes', [&self::$schemes]);
@@ -409,14 +388,14 @@ public function detectUrls(string $string, bool $plaintext_only = false): array
'((?' . '>' . '[^\[]|\[/?(?!' . $no_autolink_regex . ')' . '|(?1))*)' .
// 4 = Closing BBC markup element.
'(\[/\2\])' .
- '~i' . ($this->encoding === 'UTF-8' ? 'u' : ''),
+ '~iu',
fn($matches) => $matches[1] . str_repeat('x', strlen($matches[3])) . $matches[4],
$string,
);
// Overwrite all BBC markup elements.
$string = preg_replace_callback(
- '~\[/?' . Parser::getBBCodeTagsRegex() . '[^\]]*\]~i' . ($this->encoding === 'UTF-8' ? 'u' : ''),
+ '~\[/?' . Parser::getBBCodeTagsRegex() . '[^\]]*\]~iu',
fn($matches) => str_repeat(' ', strlen($matches[0])),
$string,
);
@@ -430,21 +409,21 @@ public function detectUrls(string $string, bool $plaintext_only = false): array
'((?' . '>' . '[^<]|?(?!a)' . '|(?1))*)' .
// 3 = Closing 'a' markup element.
'()' .
- '~i' . ($this->encoding === 'UTF-8' ? 'u' : ''),
+ '~iu',
fn($matches) => $matches[1] . str_repeat('x', strlen($matches[2])) . $matches[3],
$string,
);
// Overwrite all HTML elements.
$string = preg_replace_callback(
- '~?(\w+)\b([^>]*)>~i' . ($this->encoding === 'UTF-8' ? 'u' : ''),
+ '~?(\w+)\b([^>]*)>~iu',
fn($matches) => str_repeat(' ', strlen($matches[0])),
$string,
);
}
preg_match_all(
- '~' . $this->url_regex . '~i' . ($this->encoding === 'UTF-8' ? 'u' : ''),
+ '~' . $this->url_regex . '~iu',
$string,
$matches,
PREG_OFFSET_CAPTURE,
@@ -482,7 +461,7 @@ public function detectEmails(string $string, bool $plaintext_only = false): arra
$this->setEmailRegex();
preg_match_all(
- '~' . ($plaintext_only ? '(?:^|\s|
)\K' : '') . $this->email_regex . '~i' . ($this->encoding === 'UTF-8' ? 'u' : ''),
+ '~' . ($plaintext_only ? '(?:^|\s|
)\K' : '') . $this->email_regex . '~iu',
$string,
$matches,
PREG_OFFSET_CAPTURE,
@@ -834,7 +813,7 @@ protected function setTldRegex(): void
return;
}
- if (!$this->only_basic && $this->encoding === 'UTF-8') {
+ if (!$this->only_basic) {
Url::setTldRegex();
$this->tld_regex = Config::$modSettings['tld_regex'];
} else {
diff --git a/Sources/Config.php b/Sources/Config.php
index f04b024433..07153ec742 100644
--- a/Sources/Config.php
+++ b/Sources/Config.php
@@ -1130,9 +1130,7 @@ public static function reloadModSettings(): void
self::updateModSettings(['forum_uuid' => Uuid::getNamespace()]);
}
- // Here to justify the name of this function. :P
- // It should be added to the install and upgrade scripts.
- // But since the converters need to be updated also. This is easier.
+ // Ensure the attachment upload directory settings are valid.
if (empty(self::$modSettings['currentAttachmentUploadDir'])) {
self::updateModSettings([
'attachmentUploadDir' => Utils::jsonEncode([1 => self::$modSettings['attachmentUploadDir']]),
@@ -1147,6 +1145,11 @@ public static function reloadModSettings(): void
self::$modSettings['attachmentSizeLimit'] = empty(self::$modSettings['attachmentSizeLimit']) ? $file_max_kb : min(self::$modSettings['attachmentSizeLimit'], $file_max_kb);
self::$modSettings['attachmentNumPerPostLimit'] = !isset(self::$modSettings['attachmentNumPerPostLimit']) ? 4 : self::$modSettings['attachmentNumPerPostLimit'];
+ // Deprecated, but some old mods might use it.
+ if (!empty(self::$backward_compatibility)) {
+ self::$modSettings['global_character_set'] = 'UTF-8';
+ }
+
// Integration is cool.
if (defined('SMF_INTEGRATION_SETTINGS')) {
$integration_settings = Utils::jsonDecode(SMF_INTEGRATION_SETTINGS, true);
diff --git a/Sources/Draft.php b/Sources/Draft.php
index b586ec35b1..33e090d044 100644
--- a/Sources/Draft.php
+++ b/Sources/Draft.php
@@ -801,9 +801,9 @@ protected static function xml(int $id_draft): void
{
Lang::load('Drafts');
- header('content-type: text/xml; charset=' . (empty(Utils::$context['character_set']) ? 'ISO-8859-1' : Utils::$context['character_set']));
+ header('content-type: text/xml; charset=UTF-8');
- echo '
+ echo '<' . '?xml version="1.0" encoding="UTF-8"?' . '>