From Zakład Logiki Stosowanej
sub to_txt {
my $text = shift;
$text = decode ("iso-8859-2", $text);
$text =~ s/\n/ /g;
$text =~ /<h1>(.*?)<\/h1>.*?<h5\sclass="author">(.*?)<\/h5>.*?<h6\sclass="date">(.*?)<\/h6>/sig;
my $title = $1;
my $author = $2;
my $date = $3;
my $source = '';
if($text =~ m/<div\sid=\"source\">.*?<\/span>(.*?)<\/div>/sig){
$source = $1;
}
$text =~ s/<script.*?<\/script>//gsi;
$text =~ /<h4>(.*?)<\/h4>.*<div id=\"artykul\">(.*?)<\/div>/si;
my $summary = $1;
my $content = $2;
$summary =~ s/<br>/\n/gs;
$content =~ s/<br>/\n/gs;
$content =~ s/<span class="txt_srodtytul"><b>//gs;
$content =~ s/<\/b><\/span>//gs;
if($author and $content and $title) {
print STDERR "\nAuthor:\t $author\n";
print STDERR "Title:\t $title\n";
print STDERR "Date:\t $date\n\n";
print "\nAuthor:\t $author\n";
print "Title:\t $title\n";
print "Date:\t $date\n\n";
print "Content:\n $content\n";
}
}