Skip to content

Commit d5f77c8

Browse files
authored
Refactor regex patterns to use native RegExp (#3595)
And fix invalid "\p{han}".
1 parent f6350ed commit d5f77c8

4 files changed

Lines changed: 13 additions & 22 deletions

File tree

BibLaTeX.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
},
1818
"inRepository": true,
1919
"translatorType": 2,
20-
"lastUpdated": "2024-03-25 14:49:42"
20+
"lastUpdated": "2026-04-01 18:00:00"
2121
}
2222

2323
/*
@@ -302,9 +302,8 @@ function writeField(field, value, isMacro, noEscape) {
302302

303303
// Case of words with uppercase characters in non-initial positions is preserved with braces.
304304
// we're looking at all unicode letters
305-
var protectCaps = new ZU.XRegExp("\\b\\p{Letter}+\\p{Uppercase_Letter}\\p{Letter}*", 'g');
306305
if (field != "pages") {
307-
value = ZU.XRegExp.replace(value, protectCaps, "{$0}");
306+
value = value.replace(/\b\p{L}+\p{Lu}\p{L}*/gu, "{$&}");
308307
}
309308

310309
// Page ranges should use double dash

BibTeX.js

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
},
1919
"inRepository": true,
2020
"translatorType": 3,
21-
"lastUpdated": "2024-03-25 14:51:02"
21+
"lastUpdated": "2026-04-01 18:00:00"
2222
}
2323

2424
/*
@@ -1082,7 +1082,7 @@ function writeField(field, value, isMacro) {
10821082
value = escapeSpecialCharacters(value);
10831083

10841084
if (caseProtectedFields.includes(field)) {
1085-
value = ZU.XRegExp.replace(value, protectCapsRE, "$1{$2$3}"); // only $2 or $3 will have a value, not both
1085+
value = value.replace(protectCapsRE, "$1{$2$3}"); // only $2 or $3 will have a value, not both
10861086
}
10871087
}
10881088
var exportCharset = Zotero.getOption("exportCharset");
@@ -1341,15 +1341,12 @@ function doExport() {
13411341
// Case of words with uppercase characters in non-initial positions is
13421342
// preserved with braces.
13431343
// Two extra captures because of the other regexp below
1344-
protectCapsRE = new ZU.XRegExp("()()\\b([\\p{Letter}\\d]+\\p{Uppercase_Letter}[\\p{Letter}\\d]*)", 'g');
1344+
protectCapsRE = /()()\b([\p{L}\d]+\p{Lu}[\p{L}\d]*)/gu;
13451345
} else {
13461346
// Protect all upper case letters, even if the uppercase letter is only in
13471347
// initial position of the word.
13481348
// Don't protect first word if only first letter is capitalized
1349-
protectCapsRE = new ZU.XRegExp(
1350-
"(.)\\b([\\p{Letter}\\d]*\\p{Uppercase_Letter}[\\p{Letter}\\d]*)" // Non-initial words with capital letter anywhere
1351-
+ "|^([\\p{Letter}\\d]+\\p{Uppercase_Letter}[\\p{Letter}\\d]*)" // Initial word with capital in non-initial position
1352-
, 'g');
1349+
protectCapsRE = /(.)\b([\p{L}\d]*\p{Lu}[\p{L}\d]*)|^([\p{L}\d]+\p{Lu}[\p{L}\d]*)/gu;
13531350
}
13541351

13551352
//Zotero.write("% BibTeX export generated by Zotero "+Zotero.Utilities.getVersion());

Korean National Library.js

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsibv",
12-
"lastUpdated": "2022-12-28 02:37:11"
12+
"lastUpdated": "2026-04-01 18:00:00"
1313
}
1414

1515
/*
@@ -80,11 +80,10 @@ function getType(type) {
8080
function fixKoreanCreators(creators) {
8181
for (let i = 0; i < creators.length; i++) {
8282
var len = creators[i].lastName.length;
83-
var regex = "[\\p{hangul}\\{han}]{" + len + "}";
84-
var korean = new ZU.XRegExp(regex);
83+
var korean = new RegExp("^[\\p{Script=Hangul}\\p{Script=Han}]{" + len + "}$", 'u');
8584
if (creators[i].firstName) continue; // likely a Western name
8685
else if (len > 3) continue; // likely Japanese name
87-
else if (ZU.XRegExp.test(creators[i].lastName, korean)) {
86+
else if (korean.test(creators[i].lastName)) {
8887
// name is almost certainly Korean. First character is lastName
8988
creators[i].firstName = creators[i].lastName.replace(/^./, "");
9089
creators[i].lastName = creators[i].lastName.replace(/^(.).*/, "$1");

eLibrary.ru.js

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsibv",
12-
"lastUpdated": "2024-07-14 15:27:57"
12+
"lastUpdated": "2026-04-01 18:00:00"
1313
}
1414

1515
/*
@@ -170,13 +170,9 @@ async function scrape(doc, url = doc.location.href) {
170170
In all these cases, we put comma after LAST for `ZU.cleanAuthor()` to work.
171171
Other formats are rare, but possible, e.g. "ВАН ДЕ КЕРЧОВЕ Р." == "Van de Kerchove R.".
172172
They go to single-field mode (assuming they got no comma). */
173-
var nameFormat1RE = new ZU.XRegExp("^\\p{Letter}+\\s\\p{Letter}+\\s\\p{Letter}+$");
174-
var nameFormat2RE = new ZU.XRegExp("^\\p{Letter}+\\s\\p{Letter}\\.(\\s?\\p{Letter}\\.?)?$");
175-
var nameFormat3RE = new ZU.XRegExp("^\\p{Letter}+\\s\\(\\p{Letter}+\\)\\s\\p{Letter}+\\s\\p{Letter}+$");
176-
177-
var isFormat1 = ZU.XRegExp.test(dirty, nameFormat1RE);
178-
var isFormat2 = ZU.XRegExp.test(dirty, nameFormat2RE);
179-
var isFormat3 = ZU.XRegExp.test(dirty, nameFormat3RE);
173+
var isFormat1 = /^\p{L}+\s\p{L}+\s\p{L}+$/u.test(dirty);
174+
var isFormat2 = /^\p{L}+\s\p{L}\.(\s?\p{L}\.?)?$/u.test(dirty);
175+
var isFormat3 = /^\p{L}+\s\(\p{L}+\)\s\p{L}+\s\p{L}+$/u.test(dirty);
180176

181177
if (isFormat1 || isFormat2) {
182178
// add comma before the first space

0 commit comments

Comments
 (0)