var wgPageAuthors = {
query: { action: 'query', prop: 'revisions', titles: wgPageName, rvlimit:10, rvprop: 'ids|timestamp|user|size|content', format: 'json'},
inProgress: false,
isOpen: false,
revisions: Array(),
str: null,
toHex: function (n) {
if (n < 0) n = 0xFFFFFFFF + n + 1;
return n.toString(16);
},
fnv: function (str) {
// this is an implementation of the Fowler-Noll-Vo hash algorithm
var hash = 2166136261; // the 32 bit offset
for (var i = 0; i < str.length; i++) {
hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
hash = hash ^ str.charCodeAt(i);
}
return hash & 0x0ffffffff;
},
perm: [ // permutations for the lsh algorithm
1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
12, 181, 103, 70, 22, 58, 75, 78, 183, 167, 238, 157, 124, 147, 172, 144,
176, 161, 141, 86, 60, 66, 128, 83, 156, 241, 79, 46, 168, 198, 41, 254,
178, 85, 253, 237, 250, 154, 133, 88, 35, 206, 95, 116, 252, 192, 54, 221,
102, 218, 255, 240, 82, 106, 158, 201, 61, 3, 89, 9, 42, 155, 159, 93,
166, 80, 50, 34, 175, 195, 100, 99, 26, 150, 16, 145, 4, 33, 8, 189,
121, 64, 77, 72, 208, 245, 130, 122, 143, 55, 105, 134, 29, 164, 185, 194,
193, 239, 101, 242, 5, 171, 126, 11, 74, 59, 137, 228, 108, 191, 232, 139,
6, 24, 81, 20, 127, 17, 91, 92, 251, 151, 225, 207, 21, 98, 113, 112,
84, 226, 18, 214, 199, 187, 13, 32, 94, 220, 224, 212, 247, 204, 196, 43,
249, 236, 45, 244, 111, 182, 153, 136, 129, 90, 217, 202, 19, 165, 231, 71,
230, 142, 96, 227, 62, 179, 246, 114, 162, 53, 160, 215, 205, 180, 47, 109,
44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120, 209 ],
lsh: function (str) {
// this is an implementation of a lsh algorithm, loosley similar to nilsimsa
var arr = Array();
for (var i = 0; i <= 0x0ff; i++) {
arr = 0;
}
for (var i = 0; i < str.length-2; i++) {
// first we do a Pearson hash of trigram from the text
var hash = 0;
hash = wgPageAuthors.perm;
hash = wgPageAuthors.perm;
hash = wgPageAuthors.perm;
// then we accumulate in the array at indices given by the hash
arr++;
}
// fold the array
for (var i = 0; i <= 0x01f; i++) {
arr += arr + arr + arr + arr + arr + arr + arr;
}
var acc = 0;
var lim = arr.slice(0, 0x01f).sort(function(a,b){return a-b});
for (var i = 0; i <= 0x01f; i++) {
acc |= (((lim < arr) ? 0 : 1) << i);
}
return acc & 0x0ffffffff;
},
beans: function (n) {
// implementation of a bean counter
// make an unsigned 32bits int
if (n < 0) n = 0xFFFFFFFF + n + 1;
// From Hacker's Delight, p. 66, Figure 5-2
n = n - ((n >> 1) & 0x55555555);
n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
n = (n + (n >> 4)) & 0x0F0F0F0F;
n = n + (n >> 8);
n = n + (n >> 16);
return n & 0x0000003F;
},
cosine: function (vec1, vec2) {
// really just a difference measure for the trigram vectors
var combined = Object();
for (var x in vec1)
combined = vec1;
for (var x in vec2) {
if (combined == null) combined = 0;
combined -= vec2;
}
var acc = 0;
for (var x in combined)
acc += combined * combined;
return Math.sqrt(acc);
},
onclick: function () {
// just an onclick handler to start processing
if (wgPageAuthors.str === null) {
if (!wgPageAuthors.inProgress) {
wgPageAuthors.inProgress = true;
wgPageAuthors.isOpen = true;
jsMsg('<h3>Forfattere</h3><small id="progress" />', 'page-authors' );
$('#mw-js-message.mw-js-message-page-authors h3').each(function(i, el) { injectSpinner( el, 'page-authors' ); });
wgPageAuthors.getRevisions();
}
else {
alert('Already in progress!');
}
}
else {
if (!wgPageAuthors.isOpen) {
wgPageAuthors.isOpen = true;
jsMsg('<h3>Forfattere</h3>' + wgPageAuthors.str, 'page-authors' );
$('#mw-js-message.mw-js-message-page-authors').show().find('ul').slideDown(1000);
}
else {
wgPageAuthors.isOpen = false;
$('#mw-js-message.mw-js-message-page-authors').slideUp(1000);
}
}
},
getRevisions: function () {
// ajax handler
$.getJSON(wgScriptPath + "/api.php", wgPageAuthors.query, function(data) {
var r = data.pages.revisions;
for (var x in r) {
r.lsh = wgPageAuthors.lsh(r);
r.fnv = wgPageAuthors.toHex(wgPageAuthors.fnv(r));
if(r.size == 'undefined') r.size = r.length;
var vec = Object();
for (var i = 0; i < r.length-2; i++) {
var k = r.substr(i, 3);
if (!vec) vec = 0;
vec++;
}
r.vector = vec;
r = null; // conserve space
wgPageAuthors.revisions.push(r);
}
if (data) {
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = wgPageAuthors.revisions.length + ' revisions loaded…'; });
wgPageAuthors.query.rvstartid = data.revisions.rvstartid;
wgPageAuthors.getRevisions();
}
else {
wgPageAuthors.setResult();
}
});
},
setResult: function () {
// final handler after all ajax calls has completed
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'All revisions loaded…'; });
// variables
var digests = Object();
var users = Object();
var revids = Object();
// short form to get the revisions
var revs = wgPageAuthors.revisions;
// get the individual revisions through a revid-key
for (var i = 0; i < revs.length; i++) revids.revid] = revs;
// get the individual revisions through the fnv-digest
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Calculate FNV-digests…'; });
var shunted = 0;
for (var i = revs.length-1; 0<=i; i--) {
if (digests.fnv]) {
revs.previousid = digests.fnv].revid;
shunted++;
}
digests.fnv] = revs;
}
// cache the cosine measure and set previousid to reflect the span
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Calculate cosine measure…'; });
var i = 0;
while (i<revs.length) {
var j = i;
if (revs.previousid) {
revs.cosine = wgPageAuthors.cosine(revs.vector, revids.previousid].vector);
var previousid = revs.previousid;
while (revs.revid > previousid && i<revs.length) i++;
}
else if (revs.parentid) {
revs.cosine = wgPageAuthors.cosine(revs.vector, revids.parentid].vector);
var parentid = revs.parentid;
while (revs.revid > parentid && i<revs.length) i++;
}
else if (i+1<revs.length) {
revs.cosine = wgPageAuthors.cosine(revs.vector, revs.vector);
i++;
}
else if (i+1==revs.length) {
revs.cosine = wgPageAuthors.cosine(revs.vector, Object());
revs.previousid = 0;
break;
}
if (i != j) revs.previousid = revs.revid;
}
// adjust the cosine measure if lsh is to far off
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Adjust cosine measure…'; });
var similar = 0;
var revision = revs;
while (revision) {
var previous = revids;
if (previous /* && revision.size > 100 */ && 5<wgPageAuthors.beans(revision.lsh ^ previous.lsh)) {
var p=previous;
var lsh=256;
var keep = null;
for (var j = 0; j < 16; j++) {
if (p) {
var tmp = wgPageAuthors.beans(revision.lsh ^ p.lsh);
if (lsh > tmp) {
lsh = tmp;
keep = p;
}
p = revids;
}
else {
break;
}
}
if (keep && keep != previous) {
previous = keep;
similar++;
}
}
if (previous) {
revision.previousid = previous.revid;
revision.cosine = wgPageAuthors.cosine(revision.vector, previous.vector);
}
revision = previous;
}
// accumulate the cosine measure for each user
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Accumulate cosine measure…'; });
var revision = revs;
var num = 0;
while (revision) {
if (!users) users = { cosine: 0 };
users.cosine += revision.cosine;
revision = revids;
num++;
}
// accumulate the total cosine measure
var acc = { cosine: 0 };
for (var x in users) acc.cosine += users.cosine;
// sort out the main authors
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Sort out main authors…'; });
var authors = Array();
var pseudonyms = Array();
var count = 0;
for (var x in users) pseudonyms.push(x);
for (var x in pseudonyms.sort(function(a, b){ return users.cosine - users.cosine })) {
var name = pseudonyms;
var cosine = Math.round(100*users.cosine/acc.cosine);
if (count++ < 5 && 0 < cosine) authors.push(name);
else if (5 <= cosine ) authors.push(name);
else if (revs.user == name) authors.push(name);
}
// print the cosine measure for each user
$('#mw-js-message.mw-js-message-page-authors #progress').each(function(i, el) { el.innerHTML = 'Create presentation…'; });
var str = '<ul style="display:none">';
var part = 0;
for (var x in authors) {
var name = authors;
part += users.cosine;
str += '<li>';
str += '<a href="' + wgArticlePath.replace('\$1', 'User:' + encodeURI(name)) + '">' + name + '</a>';
str += ' (';
str += '<a href="' + wgArticlePath.replace('\$1', 'User_talk:' + encodeURI(name)) + '">diskusjon</a>';
str += ' | ';
str += '<a href="' + wgArticlePath.replace('\$1', 'Special:Contributions/' + encodeURI(name)) + '">bidrag</a>';
str += ') ';
str += ' <em>';
str += 'Har bearbeidet ' + Math.round(100*users.cosine/acc.cosine) + '%' + ' av artikkelen';
if (revs.user == name) str += ' og er artikkeloppretter';
str += '.</em>';
str += '</li>';
}
str += '</ul>';
str += '<small>';
str += 'Bidrag fra andre forfattere er ' + Math.round(100*(acc.cosine-part)/acc.cosine) + '%. ';
str += 'Utelatte revisjoner og feiltolkede bidrag kan gi avvik. ';
str += 'Det er påvist ' + shunted + ' identiske revisjoner, og søkt ' + similar + ' ganger etter forutgående lignende revisjoner på grunn av store endringer. Dette kan også gi forkastede mellomliggende revisjoner. ';
str += 'Det er brukt ' + num + ' revisjoner av de totalt ' + wgPageAuthors.revisions.length + ' som ble lastet, det vil si at ' + (wgPageAuthors.revisions.length - num) + ' mellomliggende revisjoner ble forkastet i analysen.<br />';
str += 'Se <a href="http://no.wikipedia.orghttps://wiki386.com/no/Hjelp:Forfattere_av_sider">hjelpesiden</a> for ytterligere forklaring.';
str += '</small>';
wgPageAuthors.str = str;
$('#mw-js-message.mw-js-message-page-authors #progress').remove();
$('#mw-js-message.mw-js-message-page-authors h3').each(function(i, el) { removeSpinner( 'page-authors' ); });
$('#mw-js-message.mw-js-message-page-authors').append(str);
$('#mw-js-message.mw-js-message-page-authors ul').slideDown(1000);
wgPageAuthors.inProgress = false;
}
}
// the strange test (2147483647 <= -1 >>> 1) is to verify that we have at least 32 bit ints
// note that 32 bit ints are according to standard, while it can be longer but not shorter to respect the standard
if (0 <= wgNamespaceNumber && 0 == wgNamespaceNumber%2 && wgIsArticle && (2147483647 <= -1 >>> 1)) {
try {
if (typeof($j) == 'undefined') mw.loader.load("//bits.wikimedia.org/skins-1.5/common/jquery.min.js");
addOnloadHook( function() {
mw.util.addPortletLink(
"p-cactions",
'javascript:wgPageAuthors.onclick("page-authors")',
"Forfattere",
"t-article-authors",
"Identifiser forfattere av siden",
null,
null
);
});
}
catch (e) { /* just go away */ }
}