// Latin-to-Cyrillic mapping
var Lat2CyrMap = {
'a':'а', 'A':'А', 'ă':'ӑ', 'Ă':'Ӑ', 'ä':'ӓ', 'Ä':'Ӓ', 'æ':'ӕ', 'Æ':'Ӕ', 'B':'В', 'c':'с', 'C':'С', 'ç':'ҫ', 'Ç':'Ҫ', 'e':'е', 'E':'Е', 'è':'ѐ', 'È':'Ѐ', 'ë':'ё', 'Ë':'Ё', 'ĕ':'ӗ', 'Ĕ':'Ӗ', 'ə':'ә', 'Ə':'Ә', 'H':'Н', 'i':'і', 'I':'І', 'ï':'ї', 'Ï':'Ї', 'j':'ј', 'J':'Ј', 'k':'к', 'K':'К', 'M':'М', 'o':'о', 'O':'О', 'ö':'ӧ', 'Ö':'Ӧ', 'p':'р', 'P':'Р', 'Q':'Ԛ', 's':'ѕ', 'S':'Ѕ', 'T':'Т', 'W':'Ԝ', 'x':'х', 'X':'Х', 'y':'у', 'Y':'У', 'ȳ':'ӯ', 'ÿ':'ӱ', 'á':'а́', 'é':'е́', 'í':'і́', 'ó':'о́', 'ý':'у́', 'ħ':'ћ', 'ɜ':'з' };
var EncErrMap = {'ц':'ö', 'ч':'ç', 'у':'ã', 'б':'á', 'ж':'æ'};
var Cyr2LatMap = {};
// invert Lat2CyrMap to Cyr2LatMap and strip keys of length > 1 in both directions
invertAndLengthFilter(Lat2CyrMap, Cyr2LatMap);
// define patterns and regexes for matching all chars in script, or just homoglyphs
var LatAllPat = 'A-Za-zÀ-ɏɐ-ʯ';
var LatHomoglyphPat = Object.keys(Lat2CyrMap).join('');
var LatAllRegex = new RegExp("+", "g");
var LatOneRegex = new RegExp("", "g");
var LatHomoglyphRegex = new RegExp("+", "g");
var CyrAllPat = 'Ѐ-ԯ';
var CyrHomoglyphPat = Object.keys(Cyr2LatMap).join('');
var CyrAllRegex = new RegExp("+", "g");
var CyrOneRegex = new RegExp("", "g");
var CyrHomoglyphRegex = new RegExp("+", "g");
var insourcePat = "/*(|)*/";
// Config
var viceversa = 1;
var sortbyscore = 1;
var limitresults = 50;
var slowFetch = 0;
var startTime = '';
var letsGo = "\
<b>Options:</b><br>\
<ul><li>Looking for <b>Latin</b> words with <i>Cyrillic</i> characters.</li>\
<li><a href='#' onclick='viceversa=1-viceversa; $(\"#FHOptViceVersa\").html(viceversa?\"Also show\":\"Skip\");'>Vice Versa</a>: <b><div style='display:inline' id='FHOptViceVersa'></div></b> predominantly Cyrillic words.</li>\
<li><a href='#' onclick='sortbyscore=1-sortbyscore; $(\"#FHOptSort\").html(sortbyscore?\"magic score\":\"raw results count\");'>Sort</a>: Sort by <b><div style='display:inline' id='FHOptSort'></div></b>. (Magic score puts impactful, more obviously correctable results first.)</li>\
</ul><br>\
<a href='#' onclick='findHomoglyphs();'>Let's go</a>!";
function initialize_HHunter() {
if ($("#HHContainer").length === 0){
var div = document.createElement('div');
div.setAttribute('id', 'HHContainer');
var dstyle = div.style;
dstyle.position = 'fixed';
dstyle.width = "90%";
dstyle.height = "90%";
dstyle.top = "3%";
dstyle.left = "5%";
dstyle.margin = "0";
dstyle.zIndex = "1000000";
dstyle.backgroundColor = "#fefefe";
dstyle.border = "1px solid #aaa";
dstyle.overflow = "scroll";
dstyle.display = "none";
document.body.append(div);
}
$('#HHContainer').css('display','inline');
$('#HHContainer').html("<div style='padding:0.75em; direction:ltr' id='HHContent'> <div style='float:right; margin:0; padding:0; font-family:sans-serif; cursor:pointer; color:#999; text-align:center; padding:1px' onclick='closeHH();'>ⓧ</div> <h4 style='text-align:center'>Homoglyph Hunter</h4> <div id=HHStatus>" + letsGo + "</div> <div id=HHMixedWords></div><br><br><div id=HHSnippets></div> </div><br><br><br><br><br><br>");
$("#FHOptViceVersa").html(viceversa?"Also show":"Skip");
$("#FHOptSort").html(sortbyscore?"magic score":"raw results count");
return;
}
function closeHH() {
$('#HHContainer').css('display','none');
}
function getHHSnippets (mixedWord, theTitle) {
var regexSearch = new mw.Api().get( {
action: 'query',
prop: 'revisions',
titles: theTitle,
rvprop: 'content',
format: 'json',
curtimestamp: '1',
} );
$.when( regexSearch ).then(function(article) {
var pages = article.query.pages;
var resultHTML = '';
startTime = article.curtimestamp;
var page;
for (var prop in pages) {
if (pages.hasOwnProperty(prop)) {
page = pages;
break;
}
}
var articleText = page.revisions;
var contextPat = ".{0,75}" + mixedWord + ".{0,75}";
var contextRegex = new RegExp(contextPat, "g");
var myMatches = articleText.match(contextRegex);
if (myMatches) {
var displayTitle = theTitle;
var mixedWordRegex = new RegExp (mixedWord, "g");
displayTitle = displayTitle.replace(mixedWordRegex, colorizeString(mixedWord));
var latVersion = convertScript(mixedWord, Cyr2LatMap);
var cyrVersion = convertScript(mixedWord, Lat2CyrMap);
var encVersion = convertScript(mixedWord, EncErrMap);
resultHTML += '<font size=-1>';
if (latVersion != mixedWord) {
resultHTML += '(<a style="color:blue" href=# onclick=\'fixHHArticle(this, "' + mixedWord + '","' + quoteEsc(theTitle) + '", 1)\'><b>fix-latn:</b> ' + colorizeString(latVersion) + '</a>) ';
}
if (cyrVersion != mixedWord) {
resultHTML += '(<a style="color:red" href=# onclick=\'fixHHArticle(this, "' + mixedWord + '","' + quoteEsc(theTitle) + '", 2)\'><b>fix-cyrl:</b> ' + colorizeString(cyrVersion) + '</a>) ';
}
if (encVersion != mixedWord) {
resultHTML += '(<a style="color:black" href=# onclick=\'fixHHArticle(this, "' + mixedWord + '","' + quoteEsc(theTitle) + '", 3)\'><b>fix-enc:</b> ' + colorizeString(encVersion) + '</a>) ';
}
resultHTML += '(<a href="https://wiki386.com/en/' + quoteEsc(theTitle) + '" target=_blank>open</a>) (<a href="/w/index.php?title=' + quoteEsc(theTitle) + '&action=edit" target=_blank>edit</a>)</font> <b>' + displayTitle + '</b> <ol>';
for (var i = 0; i < myMatches.length; i++) {
var display = myMatches.replace(/</g, "<");
display = display.replace(/\|]+]?]?|(+\s*=)|(<*>?)|https?:\/\/*|(\.(jpe?g|gif|png|svg|tiff|xcf|mp3|mid|ogg|flac|wav|djvu?|pdf|tab))/ig, "<span style='background-color:#FFFF99;'>$&</span>");
display = display.replace(mixedWordRegex, "<span style='background-color:#CFC'>$&</span>");
resultHTML += '<li style="font-family:monospace">...' + display + '...</li>';
}
resultHTML += '</ol><br>';
$('#HHSnippets').append(resultHTML);
}
});
return;
}
function getHHTitles( target ) {
if (slowFetch) {
return;
}
slowFetch = 1;
var title_target = target;
if (target.length > 2) {
title_target='/' + target + '/';
}
$('#HHSnippets').html('<i>Be careful changing text in links!</i><br><br>');
var titlesearch = new mw.Api().get( {
action: 'query',
list: 'search',
format: 'json',
srlimit: '50',
srsearch: 'intitle:' + title_target
} ).fail( function( code, result ) {
if ( code === "http" ) {
alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object
} else if ( code === "ok-but-empty" ) {
alert( "Error: Got an empty response from the server" );
} else {
alert( "API error: " + code );
}
return;
} );
$.when( titlesearch ).then(function(results) {
searches = results.query.search;
if (searches.length !== 0) {
$('#HHSnippets').append('<h4>Titles (' + searches.length + ') for ' + colorizeString(target) + '</h4>');
for (var i = 0; i < searches.length; i++) {
getHHSnippets(target, searches.title);
}
}
getHHTemplates(target);
});
return;
}
function getHHTemplates( target ) {
slowFetch = 1;
var templatesearch = new mw.Api().get( {
action: 'query',
list: 'search',
format: 'json',
srlimit: '50',
srsearch: 'template:"' + target + '"'
} ).fail( function( code, result ) {
if ( code === "http" ) {
alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object
} else if ( code === "ok-but-empty" ) {
alert( "Error: Got an empty response from the server" );
} else {
alert( "API error: " + code );
}
return;
} );
$.when( templatesearch ).then(function(results) {
searches = results.query.search;
if (searches.length != 0) {
$('#HHSnippets').append('<h4>Templates (' + searches.length + ') for ' + colorizeString(target) + '</h4>');
for (var i = 0; i < searches.length; i++) {
getHHSnippets(target, searches.title);
}
}
getHHFullText(target);
});
return;
}
function getHHFullText( target ) {
slowFetch = 1;
var fulltextsearch = new mw.Api().get( {
action: 'query',
list: 'search',
format: 'json',
srlimit: '50',
srsearch: 'insource:' + target
} ).fail( function( code, result ) {
if ( code === "http" ) {
alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object
} else if ( code === "ok-but-empty" ) {
alert( "Error: Got an empty response from the server" );
} else {
alert( "API error: " + code );
}
return;
} );
$.when( fulltextsearch ).then(function(results) {
searches = results.query.search;
if (searches.length != 0) {
$('#HHSnippets').append('<h4>Full-Text Results (' + searches.length + ') for ' + colorizeString(target) + '</h4>');
for (var i = 0; i < searches.length; i++) {
getHHSnippets(target, searches.title);
}
}
slowFetch = 0;
});
return;
}
function fixHHArticle( linkElem, mixedWord, theTitle, direction ) {
theTitle = quoteUnesc(theTitle);
$(linkElem).attr('onclick','');
$(linkElem).css('display', 'none');
//Get content of article
new mw.Api().get( {
action: 'query',
titles: theTitle,
prop: ,
rvprop: 'content',
indexpageids: 1,
rawcontinue: ''
} ).done( function( result ) {
var artID = result.query.pageids;
var artContents = result.query.pages.revisions;
var mixedWordRegex = new RegExp (mixedWord, "g");
var displayMixedWord = '';
var fixMsg = '';
if (direction == 3) {
// Encoding Error
var reEncoded = convertScript(mixedWord, EncErrMap);
artContents = artContents.replace(mixedWordRegex, reEncoded );
fixMsg = 'fix encoding error: ' + mixedWord + ' → ' + reEncoded;
}
else if (direction == 2) {
// Latin to Cyrillic
artContents = artContents.replace(mixedWordRegex, convertScript(mixedWord, Lat2CyrMap) );
displayMixedWord = mixedWord.replace(LatHomoglyphRegex, "");
fixMsg = 'fix homoglyphs: convert Latin characters in ' + displayMixedWord + ' to Cyrillic';
}
else {
// Cyrillic to Latin
artContents = artContents.replace(mixedWordRegex, convertScript(mixedWord, Cyr2LatMap) );
displayMixedWord = mixedWord.replace(CyrHomoglyphRegex, "");
fixMsg = 'fix homoglyphs: convert Cyrillic characters in ' + displayMixedWord + ' to Latin';
}
new mw.Api().postWithToken( 'edit', {
action: 'edit',
title: theTitle,
text: artContents,
summary: fixMsg,
minor: '1',
starttimestamp: startTime,
} ).done( function( result, jqXHR ) {
$(linkElem).after("<b style='font-size:80%'>FIXED</b>");
return;
} ).fail( function( code, result ) {
if ( code === "http" ) {
alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object
} else if ( code === "ok-but-empty" ) {
alert( "Error: Got an empty response from the server" );
} else {
alert( "API error: " + code );
}
$(linkElem).after("<b style='font-size:80%'>ERROR</b>");
return;
} );
} ).fail( function( code, result ) {
if ( code === "http" ) {
alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object
} else if ( code === "ok-but-empty" ) {
alert( "Error: Got an empty response from the server" );
} else {
alert( "API error: " + code );
}
$(linkElem).after("<b style='font-size:80%'>ERROR</b>");
return;
} );
}
function findHomoglyphs() {
if (slowFetch) {
return;
}
slowFetch = 1;
$('#HHStatus').html("<b>Fetching data... this can take 30 seconds or more.</b>");
var regexSearch = new mw.Api().get( {
action: 'query',
list: 'search',
format: 'json',
srlimit: '10000',
srsearch: 'insource:' + insourcePat
} );
$.when( regexSearch ).then(function(x) {
var matches = {};
var re = /<span class="searchmatch">(.*?)<\/span>/g;
var m;
if (x.query.search.length == 0) {
$('#HHMixedWords').html("Nothing found.");
return;
}
for (var i = 0; i < x.query.search.length; i++) {
var snip = x.query.search.snippet;
while (m = re.exec(snip)) {
if (typeof matches] == 'undefined') {
matches] = (m.match(LatOneRegex) || ).length / m.length;
if (viceversa == 1 && sortbyscore == 1 && matches] < 0.5) {
matches] = (m.match(CyrOneRegex) || ).length / m.length;
}
}
}
}
var terms = Object.keys(matches).sort(function(a, b) {
return matches - matches;
});
var artCountPromises = ;
var mwapi = new mw.Api();
for (var i = 0; i < terms.length; i++) {
artCountPromises.push( mwapi.get( { action: 'query', list: 'search', format: 'json', srlimit: '1', srsearch: 'insource:' + terms } ) );
}
var count = ;
var score = ;
$.when ( ...artCountPromises ).then(function() {
var results = arguments;
var resultHTML = '';
for (var i = 0; i < results.length; i++) {
count] = results.query.searchinfo.totalhits;
// give some weight to score, but more to Latin-ness, with a small x/1000 addition to sort 0-count items properly
score] = Math.log10(count] + 1) * matches] * matches] + (matches]/1000);
}
terms = terms.sort(function(a, b) {
if (sortbyscore) {
return score - score;
}
return count - count;
});
for (var i = 0; i < terms.length; i++) {
if (score] <= 0 && viceversa == 0) {
continue;
}
if (viceversa == 1 || matches] >= 0.5) {
var display = colorizeString(terms);
if (resultHTML) {
resultHTML += ' — ';
}
resultHTML += "<a href='#' style='color:black' onclick='copyToClipboard(\"" + terms + "\"); getHHTitles(\"" + terms + "\")'>" + display + "</a> (" + count] +
// "/" + matches] + "/" + score] +
")";
}
}
if ('' === resultHTML) {
resultHTML = "Nothing found.";
}
$('#HHStatus').html('');
$('#HHMixedWords').html(resultHTML);
} );
slowFetch = 0;
} );
return;
}
function colorizeString(str) {
var str2 = ;
for (var i = 0; i < str.length; i++) {
if (str.match(LatOneRegex)) {
// Latin
if (str.match(LatHomoglyphRegex)) {
// Latin homoglyph
str2.push("<span style='color:#00F'>" + str + "</span>")
}
else {
str2.push("<span style='color:#AAF'>" + str + "</span>")
}
}
else if (str.match(CyrOneRegex)) {
// Cyrillic
if (str.match(CyrHomoglyphRegex)) {
// Cyrillic homoglyph
str2.push("<span style='color:#F00'>" + str + "</span>")
}
else {
str2.push("<span style='color:#FAA'>" + str + "</span>")
}
}
else {
// Hmm, what's this?
str2.push(str);
}
}
str2 = str2.join('');
return str2;
}
// copy a string to the clipboard
function copyToClipboard(string) {
var $temp = $("<input>");
$("body").append($temp);
$temp.val(string).select();
document.execCommand("copy");
$temp.remove();
}
// invert one map into another; in both directions remove keys (but not values) with length > 1
function invertAndLengthFilter(src, dest) {
for (var key in src) {
var cyr = src;
if (cyr.length == 1) {
dest = key;
}
if (key.length > 1) {
delete src;
}
}
}
// map all available characters in string from one script to another based on a given map
function convertScript(str, map) {
var str2 = ;
for (var i = 0; i < str.length; i++) {
str2.push( map] || str );
}
str2 = str2.join('');
return str2;
}
// html encode quotes
function quoteEsc (theString) {
theString = theString.replace(/'/g, "%27");
theString = theString.replace(/"/g, "%22");
return theString;
}
// html decode quotes
function quoteUnesc (theString) {
theString = theString.replace(/%27/g, "'");
theString = theString.replace(/%22/g, '"');
return theString;
}
// when everything is loaded, add the Homoglyph Hunter link
$.when( mw.loader.using( ), $.ready ).then( function() {
var portletLink = mw.util.addPortletLink( 'p-tb', '#', 'Homoglyph Hunter' );
$( portletLink ).click( function ( e ) {
e.preventDefault();
initialize_HHunter();
});
});