_ _    _ _____  ___   __                       
 __      _(_) | _(_)___ / ( _ ) / /_   ___ ___  _ __ ___  
 \ \ /\ / / | |/ / | |_ \ / _ \| '_ \ / __/ _ \| '_ ` _ \ 
  \ V  V /| |   <| |___) | (_) | (_) | (_| (_) | | | | | |
   \_/\_/ |_|_|\_\_|____/ \___/ \___(_)___\___/|_| |_| |_|

Benutzer:Jah/hauptautoren.js

In der heutigen Welt ist Benutzer:Jah/hauptautoren.js ein Thema von ständigem Interesse und Diskussionen. Seit seiner Entstehung hat es die Aufmerksamkeit von Millionen Menschen auf sich gezogen und zahlreiche Diskussionen und Entscheidungen in verschiedenen Bereichen beeinflusst. Seine Bedeutung hat Grenzen überschritten und erhebliche Auswirkungen auf Gesellschaft, Wirtschaft und Kultur gehabt. Während wir Benutzer:Jah/hauptautoren.js weiter erforschen und verstehen, wird die Notwendigkeit deutlich, es aus mehreren Perspektiven und mit einem kritischen Ansatz anzugehen. In diesem Artikel werden wir die verschiedenen Aspekte und Realitäten untersuchen, die Benutzer:Jah/hauptautoren.js umfasst, mit dem Ziel, eine umfassende und bereichernde Vision zu diesem Thema zu bieten, das in unserer heutigen Welt ein Vorher und Nachher geprägt hat.
#!/usr/bin/perl -w

use utf8;

$lws  = 5;			# length of word sequence
$lang = "de";			# en, de
$mkImg = 1;			# 1: make history image, 0: don't
$histDir = "histcache";		# where to save the revision histories
$wiki = "de.wikipedia.org";	# can be changed with e.g. "-w de.wikibooks.org"
$imgUrlDir = "http://localhost/hauptautoren";

if($^O eq "linux") {
	$imgFileDir = "/var/www/html/hauptautoren";
	$netpbmPath = "";	# pnmtopng etc. should be in the system path
} else { # Windows
	$imgFileDir = "c:\\Programme\\Apache Software Foundation\\Apache2.2\\html\\hauptautoren";
	$netpbmPath = "c:\\Programme\\GnuWin32\\bin";
}

########## end of configuration #################################

use Digest::MD5 qw(md5_hex);
use Compress::Zlib;
use Encode;
use CGI qw(:standard);
use LWP;
$ua = LWP::UserAgent->new;
use open ":utf8"; binmode STDOUT, ":utf8";
use Cwd;
use File::Spec::Functions;
use Getopt::Std;

getopts('w:');
if(defined $opt_w) {
	$wiki = $opt_w;
}

if ($lang eq "en") {
	$category = "Category";
	$image = "Image";
	$words = "words";
	$fraction = "fraction";
	$user = "user";
} elsif ($lang eq "de") {
	$category = "Kategorie";
	$image = "Bild";
	$words = "Wörter";
	$fraction = "Anteil";
	$user = "Benutzer";
} else {
	die;
}

$| = 1;

$stop = 0;
$SIG{INT} = sub { $stop = 1 };
$SIG{PIPE} = sub { $stop = 1 };

$sNr = 0; $nWordsMax = 0;

import_names;

if(defined $ARGV) {
	$title = $ARGV;
	$title =~ s/ /_/g;
	$title_md5 = md5_hex "$title";
	$cgi = 0;
	$imgFileDir = cwd;
	$imgFile = catfile($imgFileDir, "$title_md5.png");
	$imgUrl  = "file:$imgFile";
} elsif(defined $Q::page) {
	$title = $Q::page;
	$title =~ s/ /_/g;
	$title_md5 = md5_hex "$title";
	$cgi = 1;
	print header(-charset => 'utf-8');
	print "<!--\n";
	mkdir $imgFileDir if $mkImg && !-d $imgFileDir;
	$imgFile = catfile($imgFileDir, "hf.png");
	$imgUrl  = "$imgUrlDir/hf.png";
} else {
	die;
}

$allowedTags = 'b|big|blockquote|br|caption|center|cite|code|dd|del|div|dl|'.
	'dt|em|font|h1|h2|h3|h4|h5|h6|hr|i|ins|li|nowiki|ol|p|pre|ref|'.
	'references|rb|rp|rt|ruby|s|small|span|strike|strong|sub|sup|table|'.
	'td|th|tr|tt|u|ul|var';

sub progress {
	print $_;
}

$subdir = substr $title_md5, 0, 2;

$idLast = -1;
if(-f catfile($histDir, $wiki, $subdir, "$title_md5.seq")) {
	progress("Loading cached sequences ... ");
	open SEQ, catfile($histDir, $wiki, $subdir, "$title_md5.seq");
	while(<SEQ>) {
		chop;
		if(/^# (\d+)$/) {
			$idLast = $1;
		} else {
			@wordsIds = split;
			$seq = join(" ", @wordsIds);
			$id{$seq} = ) ];
		}
	}
	close SEQ;
	progress("done.\n");
}

if($mkImg && -f catfile($histDir, $wiki, $subdir, "$title_md5.idh")) {
	progress("Loading cached author attribution info ... ");
	open IDH, catfile($histDir, $wiki, $subdir, "$title_md5.idh");
	binmode IDH;
	while(!eof(IDH)) {
		read IDH, $tmp, 4;
		$id = unpack('V', $tmp);
		read IDH, $tmp, 4;
		$nIdh = unpack('V', $tmp);
		push @nWords, $nIdh;
		$nWordsMax = $nIdh if $nIdh>$nWordsMax;
		read IDH, $tmp, 4;
		$idhBinGzLen = unpack('V', $tmp);
		read IDH, $idhBinGz, $idhBinGzLen;
		($gz, $status) = inflateInit(-WindowBits => 0 - MAX_WBITS);
		($idhBin, $status) = $gz->inflate($idhBinGz);
		$status==Z_STREAM_END or die $gz->msg();
		@idh = unpack("V", $idhBin);
		push @idHist, ;
	}
	close IDH;
	progress("done.\n");
	if(int(@idHist) != $idLast+1) {
		progress("Cache corrupted, must reanalyze.\n");
		unlink catfile($histDir, $wiki, $subdir, "$title_md5.seq");
		unlink catfile($histDir, $wiki, $subdir, "$title_md5.idh");
		%id = (); $idLast = -1;
		@nWords = (); @idHist = ();
	}
}

open RI, "perl loadhistory -w $wiki '$title' |";
$id=0;
progress("Analyzing history ...\n");
#open DBG, ">md5.txt";
while($revInfo = <RI>) {
	if($revInfo =~ /^# (.*)/) {
		$msg = $1;
		progress "\t$msg\n";
		if($msg =~ /^Error/) {
			close RI;
			exit;
		}
		next;
	}

	$revInfo0 = $revInfo;
	$revInfo0 =~ /user="(.*?)"/;
	if($id % 20 == 0) {
		$revInfo0 =~ /timestamp="(....)-(..)-(..)T(..):(..):(..)Z"/;
		progress("$1$2$3$4$5$6\n");
	}
	$author = $1;
	if($id<=$idLast && (!$mkImg || defined $idHist)) {
		$id++;
		next;
	} else {
		analyzeText();
		$id++;
	}
	last if $stop;
}
#close DBG;
$id--;
analyzeText() if !defined $text0;
close RI;
if($stop) {
	progress("interrupted.\n");
	exit;
} else {
	progress("done.\n");
}

sub analyzeText {
	$revInfo0 =~ /pos=(\d+) len=(\d+)/;
	$pos = $1; $len = $2;
	open TXT, catfile($histDir, $wiki, $subdir, "$title_md5.txt");
	binmode TXT;
	seek TXT, $pos, 0;
	read TXT, $textGz, $len;
	($gz, $status) = inflateInit(-WindowBits => 0 - MAX_WBITS);
	($text, $status) = $gz->inflate($textGz);
	$status==Z_STREAM_END or die $gz->msg();
	close TXT;
#	print DBG md5_hex("$text"), "\n";
	$text = Encode::decode_utf8($text);

	# convert &lt;, &gt;, &amp; and remove (inter-)wikilinks
	$text =~ s/&lt;/</sg; $text =~ s/&gt;/>/sg; $text =~ s/&amp;/&/sg;
	$text =~ s/\\]//sg;
	$text =~ s/\\|]*?\|)?(+?)\]\]/$2/sg;
	$text =~ s/\n{3,}/\n\n/sg;
	$text0 = $text;

	# remove elements not to be colored
	while($text =~ s/\{\{((?!\{\{).)*?\}\}/ /sg) {}
	while($text =~ s/\{\|((?!\{\|).)*?\|\}/ /sg) {}
	$text =~ s/\\]/ /isg;
	$text =~ s/\*? ?\/ /isg;
	$text =~ s/\*? ?(http|ftp|mailto):\S*/ /isg;
	$text =~ s/<math>.*?<\/math>/ /sg;
	$text =~ s/<\/?($allowedTags)(\s+.{1,200}?)?\/?>/ /sg;
	$text =~ s/&(\w+|#(\d+|x+));/ /sg;

	@words = ();
	while ($text =~ /+/sg) {
		push @words, $&;
	}
	$nWords = @words;
	$nWordsMax = @words>$nWordsMax?@words:$nWordsMax;
	@id = ();
	for ($i=0; $i<@words; $i++) {
		$id = $id;
	}
	for ($i=0; $i<@words-$lws+1; $i++) {
		$seq = join(" ", @words);
		if (defined $id{$seq}) {
			for ($j=$i; $j<$i+$lws; $j++) {
				if ($id>$id{$seq}) {
					$id = $id{$seq};
				}
			}
		}
	}
	my %idNew = ();
	for ($i=0; $i<@words-$lws+1; $i++) {
		$seq = join(" ", @words);
		if (!defined $id{$seq}) {
			for ($j=$i; $j<$i+$lws; $j++) {
				$id{$seq} = $id;
				$idNew{$seq} = $id;
			}
		}
	}
	open SEQ, ">>" . catfile($histDir, $wiki, $subdir, "$title_md5.seq");
	print SEQ "# $id\n";
	foreach $seq (keys %idNew) {
		print SEQ $seq, " ", join(",", @{$idNew{$seq}}), "\n";
	}
	close SEQ;

	if($mkImg && !defined $idHist) {
		$idHist = ;
		open IDH, ">>".catfile($histDir, $wiki, $subdir, "$title_md5.idh");
		binmode IDH;
#print int(@id), "\n";
		print IDH pack('V', $id);
		print IDH pack('V', int(@id));
		$idhBin = pack('V', @id);
		($gz, $status) = deflateInit(-WindowBits => 0 - MAX_WBITS);
		($idhBinGz1, $status) = $gz->deflate($idhBin);
		($idhBinGz2, $status) = $gz->flush();
		$idhBinGz = $idhBinGz1 . $idhBinGz2;
		print IDH pack('V', do { use bytes; length $idhBinGz });
		print IDH $idhBinGz;
		close IDH;
	}
}


for ($i=0; $i<@words; $i++) {
    $words{$author]}++;
}
@authors = sort {$words{$b} <=> $words{$a}} keys %words;
for ($i=0; $i<@authors; $i++) {
    if ($i>5) {
	$color{$authors} = "#000000";
	$colorImg{$authors} = "\x00\x00\x00" if $mkImg;
    } else {
	$color{$authors} = ("#bf0000", "#00bf00", "#0000bf", "#007f7f",
				"#7f007f", "#7f7f00");
	$colorImg{$authors} = ("\xbf\x00\x00", "\x00\xbf\x00", "\x00\x00\xbf", "\x00\x7f\x7f",
				   "\x7f\x00\x7f", "\x7f\x7f\x00") if $mkImg;
    }
}

# compute history image
if ($mkImg) {
	progress("Computing image ...\n");
	open IMG, ">hf_tmp.ppm";
	binmode IMG;
	printf IMG "P6 %d %d 255\n", $id+1, $nWordsMax;
	for ($y=0; $y<$nWordsMax && !$stop; $y++) {
		print "$y/$nWordsMax\n" if $y%100==0;
		for ($x=0; $x<=$id; $x++) {
			if ($y<$nWords) {
				if (!defined $colorImg{$author]}) {
					print IMG "\x00\x00\x00";
				} else {
					print IMG $colorImg{$author]};
				}
			} else {
				print IMG "\xff\xff\xff";
			}
		}
	}
	close IMG;
	if(!$stop) {
		if(defined $netpbmPath && $netpbmPath ne "") {
			$pamscale = catfile($netpbmPath, "pamscale");
			$pnmtopng = catfile($netpbmPath, "pnmtopng");
		} else {
			$pamscale = "pamscale";
			$pnmtopng = "pnmtopng";
		}
		system "$pamscale -width 400 -height 400 hf_tmp.ppm > hf_tmp2.ppm";
		system "$pnmtopng hf_tmp2.ppm > $imgFile";
	}
	unlink "hf_tmp.ppm", "hf_tmp2.ppm";
	exit if $stop;
	progress("done.\n");
}

# mask elements not to be colored
sub subst {
	my $s = "___".$sNr++."___";
	$substBlock{$s} = $_;
	$s;
}
while($text0 =~ s/\{\{((?!\{\{).)*?\}\}/subst($&)/esg) {}
while($text0 =~ s/\{\|((?!\{\|).)*?\|\}/subst($&)/esg) {}
$text0 =~ s/\\]/subst($&)/iesg;
$text0 =~ s/\*? ?\/subst($&)/iesg;
$text0 =~ s/\*? ?(http|ftp|mailto):\S*/subst($&)/iesg;
$text0 =~ s/<math>.*?<\/math>/subst($&)/esg;
$text0 =~ s/<\/?($allowedTags)(\s+.{1,200}?)?\/?>/subst($&)/esg;
$text0 =~ s/&(\w+|#(\d+|x+));/subst($&)/esg;

# color the text
for ($i=0; $i<@words; $i++) {
	$text0 =~ s/^(.*?)$words//sg;
	$gap = $1;
	if ($i==0) {
		$coloredText = "$gap<font color=\"$color{$author]}\">$words"
	} else {
		if ($gap =~ /^\s+$/ && $author] eq $author]) {
			$coloredText .= "$gap$words"
		} else {
			$coloredText .= "</font>$gap<font color=\"$color{$author]}\">$words"
		}
	}
}
$coloredText .= "</font>$text0";

# fetch back masked elements
while ($coloredText =~ s/___\d+___/$substBlock{$&}/sg) {}

$stats = "";
if($mkImg) {
	$stats .= "<table cellspacing=\"10\">\n";
	$stats .= "<tr valign=\"top\"><td>\n";
}
$stats .= "<table cellspacing=\"0\" border=\"1\">\n";
$stats .= "<tr><th>$words</th><th>$fraction</th><th>$user</th></tr>\n";
for($i=0; $i<@authors; $i++) {
	$author = $authors;
	$stats .= sprintf "<tr><td>%5i</td><td>%4.1f%%</td><td><a href=\"http://$wikihttps://wiki386.com/de/$user:%s\" style=\"color:%s\">%s</a></td></tr>\n",
		$words{$author}, 100*$words{$author}/@words, $author, $color{$author}, $author;
	if($i<6) {
		$statsShort .= sprintf "<a href=\"http://$wikihttps://wiki386.com/de/$user:%s\" style=\"color:%s\">%s</a> (%d)%s\n",
			$author, $color{$author}, $author, $words{$author}, $i<5?";":"";
	}
}
$stats .= "</table>\n";
if ($mkImg) {
	$stats .= "</td><td><img src=\"$imgUrl\"></td></tr>\n";
	$stats .= "</table>\n";
}

#open DBG, ">hf-debug.txt";
#print DBG $coloredText;
#close DBG;
exit if $stop;

progress("Sending preview request to $wiki ... ");
$url = "http://$wiki/w/index.php?title=$title&action=submit";
$response = $ua->post( $url, [
	wpTextbox1 => Encode::encode_utf8($coloredText),
	wpPreview => "Vorschau zeigen",
]);
$html = $response->decoded_content;
if($html =~ /Quelltext betrachten/) {
	progress("page is protected.\n");
	exit if $stop;
	progress("Sending another preview request to $wiki ... ");
	$url = "http://$wiki/w/index.php?title=${title}_tmp&action=submit";
	$response = $ua->post( $url, [
		wpTextbox1 => Encode::encode_utf8($coloredText),
		wpPreview => "Vorschau zeigen",
	]);
	$html = $response->decoded_content;
}
progress("done.\n");
exit if $stop;
$html =~ s/<head>/<head><base href="http:\/\/$wiki" \/>/s;
$html =~ s/<title>Bearbeiten von (.*?) - Vorschau - Wikipedia<\/title>/<title>$1 - Wikipedia<\/title>/s;
$html =~ s/<div class='previewnote'>.*?<\/div>//s;
$html =~ s/(<div id="wikiPreview">)<h2>.*?<\/h2>/$1/s;
$html =~ s/<h1 class="firstHeading">Bearbeiten von (.*?)<\/h1>/<h1 class="firstHeading">$1<\/h1>$statsShort<hr>/s;
$html =~ s/<p>Diese Seite ist \d+ kB groß\..*?<\/p>//s;
$html =~ s/<form id="editform".*?<\/form>/<p \/><hr \/><hr \/><p>$stats<\/p>/s;

if($cgi) {
	print "-->\n";
	print $html;
} else {
	open OUT, ">$title.html";
	print OUT $html;
	close OUT;
}