#!/usr/bin/env perl
# Project: Documentation Tools
# Descr: Latex --> MAN-page (groff -man), HTML and TexInfo;
# Language: PERL (>= 5.0)
# Author: Dr. Jürgen Vollmer, Juergen.Vollmer@informatik-vollmer.de
# $Id: latex2man,v 1.151 2008/11/11 07:21:14 vollmer Exp $
#
# Copyright (C) 1998 Dr. Juergen Vollmer
# Viktoriastrasse 15, D-76133 Karlsruhe, Germany
# Juergen.Vollmer@informatik-vollmer.de
# License:
# This program can be redistributed and/or modified under the terms
# of the LaTeX Project Public License Distributed from CTAN
# archives in directory macros/latex/base/lppl.txt; either
# version 1 of the License, or any later version.
#
# If you find this software useful, please send me a postcard.
require 5.0004_03;
use Getopt::Std;
# use strict 'vars';
$CMD=`basename $0`; chop ($CMD);
$gen_date = `date`; chomp $gen_date; # date when the output was generated
sub date2str;
$VERSION = "1.23";
$DATE = date2str ('$Date: 2008/11/11 07:21:14 $' =~ m|(\d+/\d+/\d+)|);
$tmp = "/tmp/$CMD.$$";
##################################################################
# check option and arguments
##################################################################
getopts('o:t:VhMHTLC:D:a:'); # -D1: write each read line -D2: write each word
sub usage
{
print <<'END';
usage: latex2man [-t transfile] [-HTML] [-C name] [-h] [-V] infile outfile.
A tool to translate UNIX manual pages written with LaTeX into a format
understood by the UNIX man(1)-command.
Reads infile, writes outfile.
-t transfile: Translation for user defined LaTeX macros.
-M: Produce output suitable for the man(1) command (default).
-H: Instead of producing output suitable for the man(1) command,
HTML code is produced (despite of the name of the command).
-T: Instead of producing output suitable for the man(1) command,
TEXINFO code is produced (despite of the name of the command).
-L: Output the LaTeX source. Useful in conjunctin with the -C
option.
-C name: Enable conditional text \`name\'.
To enable more than one conditional name use quotes:
-C 'name1 name2 ...'
The following names are defined automatically:
-H defines HTML
-T defines TEXI
-M defines MAN
-L defines LATEX
-a char: Is used only in conjunction with -T.
Background:
TEXINFO ignores all blanks before the first word on a
new line. In order to produce some additional space before
that word (using \SP) some character has to be printed
before the additional space. By default this is a . (dot).
The \`char' specifies an alternative for that first character.
Giving a blank (-a" ") supresses the indentation of a line.
Note: only for the first \SP of a series that char is printed.
-h: Help.
-V: Version.
Copyright (C) 1998 Dr. Jürgen Vollmer, Viktoriastr. 15, D-76133 Karlsruhe
email: Juergen.Vollmer@informatik-vollmer.de
License:
This program can be redistributed and/or modified under the terms
of the LaTeX Project Public License Distributed from CTAN
archives in directory macros/latex/base/lppl.txt; either
version 1 of the License, or any later version.
If you find this software useful, please send me a postcard from the place
where you are living.
END
print " Version $VERSION, $DATE.\n";
exit 1;
}
($opt_h) && usage;
($opt_V) && print "Version: $VERSION, $DATE\n";
# check command line arguments
$opt_cnt = 0;
$opt_cnt++ if ($opt_H);
$opt_cnt++ if ($opt_T);
$opt_cnt++ if ($opt_M);
$opt_cnt++ if ($opt_L);
if ($opt_cnt == 0) {
# if no option -H, -T, -M, -L is given, -M is the default
$opt_M = 1;
}
die "$CMD: you may give only one of the -H -T -M -L options\n" if ($opt_cnt > 1);
(@ARGV == 2) || die "$CMD: Expected two arguments: infile outfile. Try \`$CMD -h'.\n";
my %cond_name;
if ($opt_C) {
my $name;
foreach $name ((split (/\s/, $opt_C))) {
$cond_name{$name} = 1;
}
}
$cond_name{MAN} = 1 if ($opt_M);
$cond_name{HTML} = 1 if ($opt_H);
$cond_name{TEXI} = 1 if ($opt_T);
$cond_name{LATEX} = 1 if ($opt_L);
$SrcFile = $ARGV[0];
$DestFile = $ARGV[1];
open (SRC, "<$SrcFile") || die "$CMD: Can't open file \`$SrcFile' for reading.\n";
if ($opt_H || $opt_T) {
# DestFile will be written in the postprocess
open (DEST, ">$tmp") || die "$CMD: Can't open file \`$tmp' for writing.\n";
} else {
open (DEST, ">$DestFile") || die "$CMD: Can't open file \`$DestFile' for writing.\n";
}
########################################################################
# global variables
# $Prefix is used to construct procedure and variable names
if ($opt_M) {
$Prefix = "man";
}
if ($opt_H) {
$Prefix = "html";
}
if ($opt_T) {
$Prefix = "texi";
}
$texiCenterLine = 0; # true, only in TEXI-mode if a line must be centered
$paragraph = 0; # true, if the last output was a paragraph marker
$newline = 0; # true, if the last output was a newline char
$first_word = 1; # true, if the next word to be processed is the first
# of a new paragraph or after a line break.
# handling of itemize/enumerate/description environments:
$list_nest = 0; # counts nesting of itemize/enumerate/description envrionments
$cur_list[0] = "";# array, indexed with list_nest, indicates kind of list:
# values are: 'enum' / 'descr' / 'item'
$item_nr[0] = 0; # array, indexed with list_nest, counts the number of \item in the
# list
$manRS = 0; # true, if for Man a .RS was given after a \item
$inside_verb = 0; # true, if inside a verbatim environment
$inside_table = 0; # true, if inside a table environment
$first_column = 0; # true, if this is the first column in a table row
$columns = 0; # nr of columns in the current table
$enum_nr = 0; # current number of an enumeration
$nesting = 0; # count recursive calls of interpret_word
$section_cnt = 0; # Index into $sections
#$sections[0] # Array of all sections
#$section_kind # Array of section kind (subsection/section)
# translation of LaTeX macros without, with one and with two arguments
$Macro = \%{$Prefix . "Macro"};
$Macro1a = \%{$Prefix . "Macro1a"};
$Macro1b = \%{$Prefix . "Macro1b"};
$Macro2a = \%{$Prefix . "Macro2a"};
$Macro2b = \%{$Prefix . "Macro2b"};
$Macro2c = \%{$Prefix . "Macro2c"};
# translations of special characters
$LetterCode = \%{$Prefix . "LetterCode"};
########################################################################
sub interpret_word;
sub interpret_line;
sub Print;
sub PrintM;
sub NL;
########################################################################
# Translation for LaTeX macros for MAN
# translation of special characters
$manLetterCode{'ä'} = 'ä';
$manLetterCode{'ö'} = 'ö';
$manLetterCode{'ü'} = 'ü';
$manLetterCode{'Ä'} = 'Ä';
$manLetterCode{'Ö'} = 'Ö';
$manLetterCode{'Ü'} = 'Ü';
$manLetterCode{'ß'} = 'ß';
# LaTeX macros without arguments
$manMacro{'LaTeX'} = 'LaTeX';
$manMacro{'LATEX'} = 'LaTeX'; # needed, since \LaTeX is contained in a
# section name (which are transposed
# into uppercase
$manMacro{'itemsep'} = ' ';
# some math
$manMacro{'rightarrow'} = '-->';
$manMacro{'Rightarrow'} = '==>';
$manMacro{'leftarrow'} = '<--';
$manMacro{'Leftarrow'} = '<==';
$manMacro{'ge'} = '>=';
$manMacro{'le'} = '<=';
$manMacro{'Dollar'} = '$';
$manMacro{'Bar'} = '|';
$manMacro{'Bs'} = '\\\\';
$manMacro{'Tilde'} = '~';
$manMacro{'hline'} = '\n_';
$manMacro{'noindent'} = '';
$manMacro{'copyright'} = '(C)';
$manMacro{'Dots'} = '\&...\n';
$manMacro{'Circum'} = '^';
$manMacro{'Lbr'} = '[';
$manMacro{'Rbr'} = ']';
$manMacro{'LBr'} = '{';
$manMacro{'RBr'} = '}';
$manMacro{'Percent'} = '%';
$manMacro{'Bullet'} = '*';
$manMacro{'TEXbr'} = '';
$manMacro{'MANbr'} = '\n.br\n';
$manMacro{'TEXIbr'} = '';
$manMacro{'HTMLbr'} = '';
$manMacro{'medskip'} = '\n';
$manMacro{'SP'} = '\fB \fP'; # hack hack this works even on
# the beginning of a line
$manMacro{'SPfirst'} = $manMacro{'SP'};
$manMacro{'~'} = ' ';
$manMacro{'|'} = '|';
$manMacro{'<'} = '<';
$manMacro{'>'} = '>';
$manMacro{'<='} = '<=';
$manMacro{'>='} = '>=';
$manMacro{'='} = '=';
$manMacro{'<>'} = '<>';
$manMacro{'{'} = '{';
$manMacro{'}'} = '}';
$manMacro{'_'} = '_';
$manMacro{'$'} = '$';
$manMacro{'#'} = '#';
$manMacro{'&'} = '&';
$manMacro{'%'} = '%';
$manMacro{'-'} = '';
$manMacro{','} = ' ';
$manMacro{'\\'} = '\n.br'; # line break
$manMacro{'\\Tab'} = '\nT}'; # end of column in a table environment
# LaTeX macros with one argument
$manMacro1a{'emph'} = '\fI';
$manMacro1b{'emph'} = '\fP';
$manMacro1a{'textbf'} = '\fB';
$manMacro1b{'textbf'} = '\fP';
$manMacro1a{'texttt'} = '';
$manMacro1b{'texttt'} = '';
$manMacro1a{'verb'} = '';
$manMacro1b{'verb'} = '';
$manMacro1a{'underline'} = '\n.ul\n';
$manMacro1b{'underline'}= '\n';
$manMacro1a{'section'} = '\n.SH ';
$manMacro1b{'section'} = '\n';
$manMacro1a{'subsection'} = '\n.SS ';
$manMacro1b{'subsection'} = '';
$manMacro1a{'subsubsection'} = '\n.SS ';
$manMacro1b{'subsubsection'} = '';
$manMacro1a{'Prog'} = '';
$manMacro1b{'Prog'} = '';
$manMacro1a{'File'} = '';
$manMacro1b{'File'} = '';
$manMacro1a{'Opt'} = '\fB';
$manMacro1b{'Opt'} = '\fP';
$manMacro1a{'oOpt'} = '[\fB';
$manMacro1b{'oOpt'} = '\fP]';
$manMacro1a{'Arg'} = '\fI';
$manMacro1b{'Arg'} = '\fP';
$manMacro1a{'oArg'} = '[\fI';
$manMacro1b{'oArg'} = '\fP]';
$manMacro1a{'Email'} = '\fB';
$manMacro1b{'Email'} = '\fP';
$manMacro1a{'URL'} = '\fB';
$manMacro1b{'URL'} = '\fP';
# LaTeX macros with two arguments
$manMacro2a{'Cmd'} = '\fI';
$manMacro2b{'Cmd'} = '\fP(';
$manMacro2c{'Cmd'} = ')';
$manMacro2a{'OptArg'} = '\fB';
$manMacro2b{'OptArg'} = '\fP\fI';
$manMacro2c{'OptArg'} = '\fP';
$manMacro2a{'OptoArg'} = '\fB';
$manMacro2b{'OptoArg'} = '\fP[\fI';
$manMacro2c{'OptoArg'} = '\fP]';
$manMacro2a{'oOptArg'} = '[\fB';
$manMacro2b{'oOptArg'} = '\fP\fI';
$manMacro2c{'oOptArg'} = '\fP]';
$manMacro2a{'oOptoArg'} = '[\fB';
$manMacro2b{'oOptoArg'} = '\fP[\fI';
$manMacro2c{'oOptoArg'} = '\fP]]';
$manMacro2a{'setlength'} = '';
$manMacro2b{'setlength'}= '';
$manMacro2c{'setlength'}= '';
########################################################################
# Translation for LaTeX macros for HTML
# translation of special characters
$htmlLetterCode{'ä'} = 'ä';
$htmlLetterCode{'ö'} = 'ö';
$htmlLetterCode{'ü'} = 'ü';
$htmlLetterCode{'Ä'} = 'Ä';
$htmlLetterCode{'Ö'} = 'Ö';
$htmlLetterCode{'Ü'} = 'Ü';
$htmlLetterCode{'ß'} = 'ß';
# LaTeX macros without arguments
$htmlMacro{'LaTeX'} = 'LaTeX';
$htmlMacro{'LATEX'} = 'LaTeX'; # needed, since \LaTeX is contained in a
# section name (which are transposed
# into uppercase
$htmlMacro{'itemsep'} = '';
# some math
$htmlMacro{'rightarrow'} = '-->';
$htmlMacro{'Rightarrow'} = '==>';
$htmlMacro{'leftarrow'} = '<--';
$htmlMacro{'Leftarrow'} = '<==';
$htmlMacro{'ge'} = '>';
$htmlMacro{'le'} = '<=';
$htmlMacro{'Dollar'} = '$';
$htmlMacro{'Bar'} = '|';
$htmlMacro{'Bs'} = '\\';
$htmlMacro{'Tilde'} = '~';
$htmlMacro{'hline'} = '';
$htmlMacro{'noindent'} = '';
$htmlMacro{'copyright'} = '©';
$htmlMacro{'Dots'} = '...';
$htmlMacro{'Circum'} = '^';
$htmlMacro{'Lbr'} = '[';
$htmlMacro{'Rbr'} = ']';
$htmlMacro{'LBr'} = '{';
$htmlMacro{'RBr'} = '}';
$htmlMacro{'Percent'} = '%';
$htmlMacro{'Bullet'} = '*';
$htmlMacro{'TEXbr'} = '';
$htmlMacro{'MANbr'} = '';
$htmlMacro{'TEXIbr'} = '';
$htmlMacro{'HTMLbr'} = '
\n';
$htmlMacro{'medskip'} = '
\n';
$htmlMacro{'SP'} = ' ';
$htmlMacro{'SPfirst'} = $htmlMacro{'SP'};
$htmlMacro{'~'} = ' ';
$htmlMacro{'|'} = '|';
$htmlMacro{'<'} = '<';
$htmlMacro{'>'} = '>';
$htmlMacro{'<='} = '<=';
$htmlMacro{'>='} = '>=';
$htmlMacro{'='} = '=';
$htmlMacro{'<>'} = '<>';
$htmlMacro{'{'} = '{';
$htmlMacro{'}'} = '}';
$htmlMacro{'_'} = '_';
$htmlMacro{'$'} = '$';
$htmlMacro{'#'} = '#';
$htmlMacro{'&'} = '&';
$htmlMacro{'%'} = '%';
$htmlMacro{'-'} = '';
$htmlMacro{','} = ' ';
$htmlMacro{'\\'} = '
\n'; # line break
$htmlMacro{'\\Tab'} = '\n\n'; # end of column in a table environment
# LaTeX macros with one argument
$htmlMacro1a{'emph'} = '';
$htmlMacro1b{'emph'} = '';
$htmlMacro1a{'textbf'} = '';
$htmlMacro1b{'textbf'} = '';
$htmlMacro1a{'texttt'} = '';
$htmlMacro1b{'texttt'} = '';
$htmlMacro1a{'verb'} = '';
$htmlMacro1b{'verb'} = '';
$htmlMacro1a{'underline'} = '';
$htmlMacro1b{'underline'} = '';
$htmlMacro1a{'section'} = '\n
"; NL; $paragraph = 1; } } sub htmlVerb { $arg = $_[0]; $arg =~ s/&/&/g; $arg =~ s/>/>/g; $arg =~ s/</g; Print $arg; } sub htmlItemWithArg { my $arg = $_[0]; NL; if ($item_nr[$list_nest] > 1) { NL; Print ""; NL; } Print "
'; } sub htmlTableEnd { NL; Print " |
'; NL; } sub htmlVerbatimEnd { Print ''; NL; } sub htmlVerbatimLine { s/&/&/g; s/</g; s/>/>/g; print DEST "$_"; } ########################################################################### # processing for TexInfo sub texiStart { Print '\input texinfo @c -*-texinfo-*-'; NL; Print '@c %**start of header'; NL; Print '@setfilename ' . "$name.info"; NL; Print '@settitle ' . "$name"; NL; Print '@c %**end of header'; NL; Print '@c Manual page created with' ." $CMD on $gen_date>"; NL; Print '@c NOTE: This file is generated, DO NOT EDIT.'; NL; } sub texiEnd { Print '@bye'; NL; Print '@c NOTE: This file is generated, DO NOT EDIT.'; NL; } sub texiSection { my ($cnt, $kind, $section) = @_; if (uc $sections[$cnt-1] eq "SYNOPSIS") { Print '\n@@INSERTION-POINT@@-TOC@@\n'; $sections[$cnt-1] = "Top"; # The predecessor node is Top and not SYNOPSIS } if (uc $sections[$cnt] eq "SYNOPSIS") { $cnt == 1 || die "$CMD: The Synopsis section must be the first section after\n" . "\t the Name environment\n"; } else { Print '\n@@INSERTION-POINT@@-TEXI-SEC@@' . " $kind $cnt" . '\n'; } interpret_line "\\$kind\{$section\}"; } sub texiNameStart { my ($name, $chapter, $author, $tool) = @_; $sections[0] = "Top"; # Print '@dircategory ' .$tool; NL; Print '@dircategory Man-pages'; NL; Print '@direntry'; NL; Print "* " . (ucfirst $name) . ": ($name). Its Man-Page "; NL; Print '@end direntry'; NL; Print '@titlepage'; NL; Print '@title ' . "$name"; NL; Print '@subtitle ' . "$tool"; NL; Print '@author ' . "$author"; NL; Print '@end titlepage'; NL; Print '\n@@INSERTION-POINT@@-TEXI-TOP@@'; NL; Print '@top ' . "$name"; NL; } sub texiNameEnd { # nothing } sub texiParagraph { if (!$paragraph) { NL; print DEST "\n"; $paragraph = 1; } } sub texiVerb { $arg = $_[0]; $arg =~ s/({|})/\@$1/g; Print $arg; } sub texiItemWithArg { my $arg = $_[0]; Print '\n@item '; interpret_word $arg; NL; } sub texiItem { Print '\n@item\n'; } sub texiDescriptionStart { Print '\n@table @samp\n'; } sub texiDescriptionEnd { Print '\n@end table\n'; } sub texiItemStart { Print '\n@itemize @bullet\n'; } sub texiItemEnd { Print '\n@end itemize\n'; } sub texiCenterStart { $texiCenterLine = 1; $newline = 0; $texiMacro{'\\'} = '@*'; # line break $texiMacro{'TEXIbr'} = '@*'; NL; } sub texiCenterEnd { $texiCenterLine = 0; $newline = 0; $texiMacro{'\\'} = '@*\n'; # line break $texiMacro{'TEXIbr'} = '@*\n'; NL; } sub texiEnumStart { Print '\n@enumerate\n'; } sub texiEnumEnd { Print '\n@end enumerate\n'; } sub texiTableStart { my $columns = $_[0]; my $width = $_[1]; my $i; Print '\n@multitable @columnfractions '; for ($i = 1; $i <= $columns; $i++) { Print " " .0.9/$columns ; } Print '\n'; } sub texiTableSep { Print '@tab '; } sub texiTableEnd { Print '\n@end multitable\n'; } sub texiVerbatimStart { NL; Print '@*'; NL } sub texiVerbatimEnd { NL; } sub texiVerbatimLine { s/({|}|@| )/@\1/g; chop; print DEST ".$_\@*\n"; } ########################################################################### ########################################################################### # general processing # emit an error message is the given macro does not exists. sub check_Macro { exists $Macro->{$_[0]} || die "Error in line $.: no such macro: \\$_[0]\n"; } sub check_Macro1 { (exists $Macro1a->{$_[0]} && exists $Macro1b->{$_[0]}) || die "$CMD: Error in line $.: no such macro: \\$_[0]\n"; } sub check_Macro2 { (exists $Macro2a->{$_[0]} && exists $Macro2b->{$_[0]} && exists $Macro2c->{$_[0]}) || die "$CMD: Error in line $.: no such macro: \\$_[0]\n"; } sub NL { if (!$newline) { printf DEST "\n"; if ($texiCenterLine) { print DEST "\@center "; } $newline = 1; } } sub interpret_word { if (@_ <= 0) { return; } $_ = join " ", @_; my ($s,$m,$a1,$a2,$r); # start, match/macro, argument1, argument2 my $add_blank = 1; # if true, add a blank after the word if ($opt_D == 2) { if ($nesting == 0) { print "**** "; } else { print " "; } print "\`$_'\n"; } if ($opt_H) { # handling of HTML table rows if ($inside_table == 1) { if ($first_column == 1) { if (/^$/) { return; } if (/^\\hline/) { Print '\n