#!/usr/bin/perl -w # # ifilepp -- ifile preprocessor: ifilepp -h for usage # use strict; use Getopt::Std; use vars qw($defmax); ## variable declaration my (%opt, $bytes, $maxBytes, %validTag, $tagBuf, $lineBuf, $base64Printed); ## configuration $defmax = 4096; for (qw( html head body table td tr tbody script form input frame p ul ol dl b i a li center title img meta div span select option optgroup )) { $validTag{uc()}++; $validTag{lc()}++; $validTag{'/' . uc()}++; $validTag{'/' . lc()}++; } ## startup and options $0 =~ s,.*/,,; getopts('hHBMNm:', \%opt); usage() if $opt{h}; ## intialization $maxBytes = $opt{m} || $defmax; $tagBuf = ''; $lineBuf = ''; $bytes = 0; $base64Printed = 0; ## main loop while (<>) { ## skip header if (1 .. /^\r?$/) { print; next; } ## max size if (($bytes += length()) > $maxBytes) { print($tagBuf, $lineBuf, <>); exit; } ## skip base64 data unless ($opt{M}) { if (/^Content-Transfer-Encoding:\s+base64/i../^--/) { print "BaseSixtyFour\n" unless $base64Printed++; next; } $base64Printed = 0; } ## join ='ed lines if (/=\r?$/) { $lineBuf .= $`; next; } elsif (length($lineBuf)) { $_ = $lineBuf . $_; $lineBuf = ''; } ## Limited Entity Substitution s/ ?/ /gi; ## BadHTML s/(\S*)<([^\s>]*)>(\S*)/$validTag{$2} ? $& : " $1$3 BadHTML "/ge unless $opt{B}; ## NonEnglish s/\b\w*[\x80-\xff]+\w*\b/ NonEnglish /g unless $opt{N}; ## HTML tags unless ($opt{H}) { ## complete HTML tags s/<[^>]+>//g; ## close HTML tags if (/^[^<]+>/ && length($tagBuf)) { $tagBuf = ''; $_ = $'; } ## open HTML tags if (/<[^>]+$/) { if (length($tagBuf)) { print $tagBuf; $tagBuf = ''; } $tagBuf = $&; $_ = $`; } elsif (length($tagBuf)) { $tagBuf .= $_; next; } } print; } ## subroutines sub usage { die <