Skip to content

Commit

Permalink
Handle more edge-cases
Browse files Browse the repository at this point in the history
Closes #58
  • Loading branch information
sindresorhus committed Jun 30, 2024
1 parent 94ad7e1 commit be33439
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 1 deletion.
33 changes: 32 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import emojiRegex from 'emoji-regex';

const segmenter = new Intl.Segmenter();

const defaultIgnorableCodePointRegex = /^\p{Default_Ignorable_Code_Point}$/u;

export default function stringWidth(string, options = {}) {
if (typeof string !== 'string' || string.length === 0) {
return 0;
Expand Down Expand Up @@ -33,8 +35,37 @@ export default function stringWidth(string, options = {}) {
continue;
}

// Ignore zero-width characters
if (
(codePoint >= 0x20_0B && codePoint <= 0x20_0F) // Zero-width space, non-joiner, joiner, left-to-right mark, right-to-left mark
|| codePoint === 0xFE_FF // Zero-width no-break space
) {
continue;
}

// Ignore combining characters
if (codePoint >= 0x3_00 && codePoint <= 0x3_6F) {
if (
(codePoint >= 0x3_00 && codePoint <= 0x3_6F) // Combining diacritical marks
|| (codePoint >= 0x1A_B0 && codePoint <= 0x1A_FF) // Combining diacritical marks extended
|| (codePoint >= 0x1D_C0 && codePoint <= 0x1D_FF) // Combining diacritical marks supplement
|| (codePoint >= 0x20_D0 && codePoint <= 0x20_FF) // Combining diacritical marks for symbols
|| (codePoint >= 0xFE_20 && codePoint <= 0xFE_2F) // Combining half marks
) {
continue;
}

// Ignore surrogate pairs
if (codePoint >= 0xD8_00 && codePoint <= 0xDF_FF) {
continue;
}

// Ignore variation selectors
if (codePoint >= 0xFE_00 && codePoint <= 0xFE_0F) {
continue;
}

// This covers some of the above cases, but we still keep them for performance reasons.
if (defaultIgnorableCodePointRegex.test(character)) {
continue;
}

Expand Down
53 changes: 53 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ test('ignores control characters', t => {

test('handles combining characters', t => {
t.is(stringWidth('x\u0300'), 1);
t.is(stringWidth('\u0300\u0301'), 0);
t.is(stringWidth('e\u0301e'), 2);
t.is(stringWidth('x\u036F'), 1);
t.is(stringWidth('\u036F\u036F'), 0);
});

test('handles ZWJ characters', t => {
Expand All @@ -45,3 +49,52 @@ test('handles ZWJ characters', t => {
t.is(stringWidth('👩‍👩‍👦‍👦'), 2);
t.is(stringWidth('👨‍❤️‍💋‍👨'), 2);
});

test('handles zero-width characters', t => {
t.is(stringWidth('\u200B'), 0);
t.is(stringWidth('x\u200Bx'), 2);
t.is(stringWidth('\u200C'), 0);
t.is(stringWidth('x\u200Cx'), 2);
t.is(stringWidth('\u200D'), 0);
t.is(stringWidth('x\u200Dx'), 2);
t.is(stringWidth('\uFEFF'), 0);
t.is(stringWidth('x\uFEFFx'), 2);
});

test('handles surrogate pairs', t => {
t.is(stringWidth('\uD83D\uDE00'), 2); // 😀
t.is(stringWidth('A\uD83D\uDE00B'), 4);
});

test('handles variation selectors', t => {
t.is(stringWidth('\u{1F1E6}\uFE0F'), 1); // Regional indicator symbol A with variation selector
t.is(stringWidth('A\uFE0F'), 1);
t.is(stringWidth('\uFE0F'), 0);
});

test('handles edge cases', t => {
t.is(stringWidth(''), 0);
t.is(stringWidth('\u200B\u200B'), 0);
t.is(stringWidth('x\u200Bx\u200B'), 2);
t.is(stringWidth('x\u0300x\u0300'), 2);
t.is(stringWidth('\uD83D\uDE00\uFE0F'), 2); // 😀 with variation selector
t.is(stringWidth('\uD83D\uDC69\u200D\uD83C\uDF93'), 2); // 👩‍🎓
t.is(stringWidth('x\u1AB0x\u1AB0'), 2); // Combining diacritical marks extended
t.is(stringWidth('x\u1DC0x\u1DC0'), 2); // Combining diacritical marks supplement
t.is(stringWidth('x\u20D0x\u20D0'), 2); // Combining diacritical marks for symbols
t.is(stringWidth('x\uFE20x\uFE20'), 2); // Combining half marks
});

test('ignores default ignorable code points', t => {
t.is(stringWidth('\u2060'), 0); // Word joiner
t.is(stringWidth('\u2061'), 0); // Function application
t.is(stringWidth('\u2062'), 0); // Invisible times
t.is(stringWidth('\u2063'), 0); // Invisible separator
t.is(stringWidth('\u2064'), 0); // Invisible plus
t.is(stringWidth('\uFEFF'), 0); // Zero-width no-break space
t.is(stringWidth('x\u2060x'), 2);
t.is(stringWidth('x\u2061x'), 2);
t.is(stringWidth('x\u2062x'), 2);
t.is(stringWidth('x\u2063x'), 2);
t.is(stringWidth('x\u2064x'), 2);
});

0 comments on commit be33439

Please # to comment.