#!/usr/bin/perl -w # # file: build/htmlpostprocess # purpose: postprocess html for my homepage # created: pasha oct 29 2000 # modified: pasha sep 24 2007 # modification: border="0" in # synopsis: htmlpostprocess --webroot= # --pagedir= # < input > output # pending: strip html comments # notes: # # it does 12 things: # 1. inserts width and height attributes of tag # 2. adds type="square" to
    tag # 3. converts translit to cyrillic inside and tags (my own invention) # 4. substitute with the tumbdown icon # 5. converts tag to amazon associates link # 6. converts tag to ozon associates link # 7. converts tag to a red exclamation mark sign # 8. converts tag to a small wikipedia image # 9. converts tag to link to rss feed with xml icon # 10. converts tag to MR link # 11. converts tag to Zbl link # 12. converts tag to a google print link # use strict; use Getopt::Long; use pasha::common; ######### define subclass of HTML::Filter ########### package homepagefilter; require HTML::Filter; use vars qw(@ISA); @ISA = qw(HTML::Filter); use GD; use translit2koi8; use constant GIF_HEADER_STRUCT => 'C6S2'; # 6 characters (gif version), and # 2 unsigned shorts (width & length) use constant GIF_HEADER_LENGTH => length (pack (GIF_HEADER_STRUCT, ())); # constructor sub new { my $class = shift; my $this = $class->SUPER::new (@_); for (qw(inside_koi8 inside_win)) { $this->{$_} = 0; } return ($this); } sub start { my ($this, $tag, $attr, undef, $orig) = @_; for ($tag) { /^img$/ && do { ######### process tag ########### # don't process remote images if ((exists ($attr->{'src'})) && ($attr->{'src'} !~ /^http:\/\//) && ((! exists ($attr->{'width'})) || (! exists ($attr->{height})))) { my ($width, $height) = get_size (web2path ($attr->{src})); my $add = ''; if (! exists ($attr->{width})) { $add .= " width=\"$width\""; } if (! exists ($attr->{height})) { $add .= " height=\"$height\""; } $orig =~ s/(\/|)>$/${add} $1>/; } last; }; /^ul$/ && do { ######### process
      tag ############ if (! exists $attr->{type}) { $orig =~ s/>$/ type="square">/o; } last; }; /^koi8$/ && do { $this->{'inside_koi8'}++; $orig = ''; last; }; /^win$/ && do { $this->{'inside_win'}++; $orig = ''; last; }; /^down$/ && do { $orig = '[down as of ' . $attr->{'date'} . ']'; }; /^amazon$/ && do { #$orig = '
      a
      m
      a
      z
      o
      n
      {'asin'} . '/ref=nosim/pashazusmansh-20">amazon cover
      '; last; }; /^amazontxt$/ && do { $orig = ''; last; }; /^ozon$/ && do { $orig = '
      o
      z
      o
      n
      ozon cover
      '; last; }; /^exclam$/ && do { $orig = '[!] '; last; }; /^wikipedia$/ && do { $orig = ''; last; }; /^rss$/ && do { die ('no href attribute for rss tag') if (! defined ($attr->{'href'})); $orig = ' 
      '; last; }; /^mr$/ && do { (my $hash_to_23 = $attr->{'mr'}) =~ s/#/%23/;; $orig = "MR " . $attr->{'mr'} . ''; last; }; /^zbl/ && do { # old: #$orig = ''; # new: $orig = ''; last; }; /^gprint/ && do { # it could be in two forms: and my $set = 0; for (qw(vid id)) { if (defined ($attr->{$_})) { $orig = "{$_} . '">google books'; $set = 1; last; } } if ($set == 0) { ::me_die (' tag without either vid of id attribute'); } last; }; } # end of switch thru the custom tags $this->output ($orig); } # end of start() sub end { my $this = shift; for ($_[0]) { /^koi8$/ && do { $this->{'inside_koi8'}--; last; }; /^win$/ && do { $this->{'inside_win'}--; last; }; /^amazontxt$/ && do { $this->output (''); last; }; /^zbl/ && do { $this->output (''); last; }; $this->SUPER::end (@_); } } sub text { my ($this, $text) = @_; if ($this->{'inside_koi8'} > 0) { $text = translit2koi8 ($text); } elsif ($this->{'inside_win'} > 0) { $text = translit2win ($text); } $this->SUPER::text ($text); } # note that the following functions are not blessed # into this class, hence first argument isn't class reference # get image size # input - image filename # output - ($width, $height) list sub get_size ($) { my ($width, $height); if (::get_ext ($_[0]) eq 'gif') { # GD like any other good modern software does not support gifs, # but we, as the majority of other web suckers, still use them, # so do it ourselves my $gif; open (F, "<$_[0]") || ::me_die ("unable open $_[0]"); read (F, $gif, GIF_HEADER_LENGTH) || me_die ('unable read ' . GIF_HEADER_LENGTH . " bytes from $_[0]"); close (F); if (! $gif =~ /^GIF/o) { ::me_die ("$_[0] is not a gif file"); } ($width, $height) = (unpack (GIF_HEADER_STRUCT, $gif))[6,7]; } else { my $gd = new GD::Image->new ($_[0]) || ::me_die ("error opening $_[0] with GD"); ($width, $height) = $gd->getBounds(); } if ((! defined ($width)) || (! defined ($height))) { ::me_die ("unable get size of $_[0]"); } return ($width, $height); } # end of get_size() # transforms webserver path to real path sub web2path ($) { return (($_[0] =~ /^\//o ? $::WEBROOT : $::PAGEDIR . '/') . $_[0]); } ######### end of subclass of HTML::Filter ########### package main; use vars qw($WEBROOT $PAGEDIR); local $WEBROOT; # we need them to be visible in homepagefilter local $PAGEDIR; GetOptions ( 'webroot=s' => \$WEBROOT, 'pagedir=s' => \$PAGEDIR ); if ((! defined ($WEBROOT)) || (! defined ($PAGEDIR))) { print (STDERR 'Usage: ' . ME . " --webroot= --pagedir= < input > output\n"); exit (1); } homepagefilter->new->parse_file (*STDIN) || me_die ('error from HTML::Filter parsing STDIN'); __END__