{$H+}
Program corridx;
(*
This makes corrections in a *.idx
file created by Latex as a raw index

in Latex we have these commands for making the index:

\begin{verbatim}
    % acronyms 2 Entries
    \newcommand{\ia}[2]{#1 (#2)\index{acr #2@#2!#1}}
    % chemical 1 Entry
    \newcommand{\ic}[1]{#1\index{chem #1@#1}}
    % general 1 Entry
    \newcommand{\ig}[1]{#1\index{gen #1@#1}}
    Example: \ia{Polyalkylene oxide}{PAO}

\end{verbatim}


To start the program 

with a command line parameter:

in winedt use as command line

c:\mypathto\corridx.exe "%P\%N.idx"
  %P     Input File's Path
  %N     Input File's Name

                             
Better put in your Windet directory and use:
to be clarified work out how it works really

% Exe('%B\Exec\MiKTeX\corridx.exe');                                            
% 
% Exe('%B\Exec\MiKTeX\TeX.edt');                                            

*)


const

cifn='in.txt'; 
cofn='out.txt';
ctfn='idx.tmp';
clfn='idx.log';


var
ifn,ofn,tfn,lfn: shortstring;
s,t,l: text;
str: string;
LineNumber : word;


Procedure Message(str: shortstring);
begin
    if str='' then
    begin
        Writeln;
        Writeln(l, str);
    end
    else
    begin
        Writeln('[',linenumber,'] ',str);
        Writeln(l, '[',linenumber,'] ',str);
    end;
end;

Procedure copytxt(str, tfn: shortstring);
var
    s,t: text;
    c: char;
begin
    Message('Copy '+ str + ' to '+ tfn);
    Assign(s,str);
    Assign(t,tfn);
    reset(s);
    rewrite(t);
    while not eof(s) do
    begin
        read(s,c);
        write(t,c);
    end;
    close(s);
    close(t);
end;





Procedure init;
begin
    LineNumber:=0; ;
    ifn:=cifn;
    ofn:=cofn;
    tfn:=ctfn;
    lfn:=clfn;
if paramcount=1 then 
begin
    ifn:=paramstr(1);
    tfn:=paramstr(1)+'.tmp';
    lfn:=paramstr(1)+'.log';
    ofn:=ifn;
end;
    Assign(l,lfn);
    rewrite(l);
    Message('This is corridx');
    Message('Preprocessor for the Latex File *.idx');
    Message('');
    Message('Program Info '+ paramstr(0));

    Message('Input File is '+ ifn);
    Message('Output File is '+ ofn);
    Message('Temp File is '+ tfn);
    Copytxt(ifn,tfn);
    Assign(s,tfn);
    Assign(t,ofn);
    reset(s);
    rewrite(t);
end;

Procedure finish;
begin

    close(s);
    close(t);

    Message('Finished corridx'); 
    close(l);
    if paramcount <> 1 then readln(input);

end;


Procedure Error(str: string);
begin

    close(s);
    close(t);

    Message('Error corridx'); 
    Message(str);
    Message('restoring input file');
    Copytxt(tfn,ifn);
    close(l);
    readln(input);
    halt;

end;


Procedure RepAll(var Mainstr: string; Findstr, Replacestr: string);
var
p: longint;
begin
p:=pos(Findstr,Mainstr); 
while  p>0 do
    begin
        delete(Mainstr,p,length(findstr));
        Insert(Replacestr, Mainstr,p);
        p:=pos(Findstr,Mainstr); 
    end;
end;



Function Pagestr(var str: string):string;
(*
    Here we extract the page number enclosed in curled brackets
    \indexentry{acr TDI@TDI!toluene diisocyanate }{49}
*)

var
s: string;
i, p: longint;
 
begin
    s:='{0}'; {Makeindex wants a valid pagenumber}
    if pos('\section',str)>0 then 
    begin
        Pagestr:=s;
        Message('an index subheading');
        exit;
    end;
    p:= pos('}{', str);
    if p>0 then 
    begin
    s:='';
    for i:= p+1 to length(str) do s:=s+str[i];
    end
    else Error('no page string');
    if s= '{}' then Error('invalid page string');
    Pagestr:=s;
end;


Function Sortstr(var str: string):string;
(*
    Here we extract what is behind 
    '\indexentry{' up to '@'
    \indexentry{acr TDI@TDI!toluene diisocyanate }{49}
*)

var
s: string;
i, p,q: longint;
 
begin
    
    s:='';
    p:= length('\indexentry{');
    q:= pos('@', str);
    if (p>0) and (q>0) then 
    begin
        inc(p);
        dec(q);
        for i:=  p to q do s:=s+str[i];
    end;
    Sortstr:=s;
end;


Function BeforeSubentry(var str: string):string;
(*
    Here we extract what is behind 
    '\indexentry{' up to '!'
    \indexentry{acr TDI@TDI!toluene diisocyanate }{49}
*)

var
s: string;
i, p,q: longint;
 
begin
   if pos('!',str)=0 then Error('no subindex');
   s:='';

    p:= length('\indexentry{');
    q:= pos('!', str);
    if (p>0) and (q>0) then 
    begin
        inc(p);
        dec(q);
        for i:=  p to q do s:=s+str[i];
    end;
    BeforeSubentry:=s;
end;

Function AfterSubentry(var str: string):string;
(*
    Here we extract what is behind 
    '!'up to '}{'
    \indexentry{acr TDI@TDI!toluene diisocyanate }{49}
*)

var
s: string;
i, p,q: longint;
 
begin
    if pos('!',str)=0 then Error('no subindex');
    s:='';
    p:= pos('!', str);
    q:= pos('}{', str);
    if (p>0) and (q>0) then 
    begin
        inc(p);
        dec(q);
        for i:=  p to q do s:=s+str[i];
    end;
    AfterSubentry:=s;
end;




Function indexstr(var str: string):string;
(*
    Here we extract what is behind 
    '@'up to '}{'
    \indexentry{acr TDI@TDI!toluene diisocyanate }{49}
*)

var
s: string;
i, p,q: longint;
 
begin
    s:='';
    p:= pos('@', str);
    q:= pos('}{', str);
    if (p>0) and (q>0) then 
    begin
        inc(p);
        dec(q);
        for i:=  p to q do s:=s+str[i];
    end;
    Indexstr:=s;
end;


Function Subheading(var str: string):boolean;
var 
b: boolean;
s: string;
begin

    b:= (pos('\section',str)>0) or
        (pos('\subsection',str)>0) or
        (pos('\subsubsection',str)>0) or
        (pos('\paragraph',str)>0);
    subheading:=b;
    if b then 
        begin
        s:='\indexentry{'+Sortstr(str)+'@'+indexstr(str) +'}'+Pagestr(str);    
        str:=s;
            
        
        end;
end;




Procedure CleanMath(var str: string);
var
    b: boolean;
    i: longint;
begin
    b:=false;
    for i:=1 to length(str) do
    begin
    if str[i] = '$' then b:= not b; {toggle b if a $ is in str}
    if b then str[i]:= ' ';
    end;    

end;

Procedure CleanSoftHyphen(var str: string);
begin
    RepAll(str,'\-','');
end;


Procedure CleanNonLetter(var str: string);
var
    i: longint;
begin
    for i:=1 to length(str) do
    begin
    if str[i] in ['A'..'Z', 'a'..'z','~'] then else str[i]:= ' ';
    end;    

end;


Procedure CleanSingleLetter(var str: string);
var
    i: longint;
begin
    if str[2]=' ' then str[1]:=' '; {remove a leading non space}
    for i:=2 to length(str)-1 do
    begin
    if (str[i-1] = ' ') and (str[i+1] = ' ')   then str[i]:= ' ';
    end;    

end;

Procedure CleanPrefix(var str: string);
begin
{I want to clean some prefixes}
    str:=lowercase(str);
    Repall(str,
           'tert ', 
           '     ');

    Repall(str,
           'sec ', 
           '    ');

    Repall(str,
           'cis ', 
           '    ');

    Repall(str,
           'trans ', 
           '      ');
    Repall(str,
           'syn ', 
           '    ');

    Repall(str,
           'anti ', 
           '     ');


    Repall(str,
           'exo ', 
           '    ');

    Repall(str,
           'endo ', 
           '     ');

    Repall(str,
           'cyclo ', 
           '      ');

    Repall(str,
           'spiro ', 
           '      ');

end;


Procedure CleanChem(var str: string);
begin
    CleanMath(str);
    CleanNonLetter(str);
    CleanSingleLetter(str);
    CleanPrefix(str);
end;


Procedure SetFirstCap(var istr,sstr: string); {Indexstring and Sortstring}
var
s: string;
i: longint;
begin
    s:=istr;
    CleanChem(s);
    sstr:=s;
    repall(sstr,' ','');
    sstr:=lowercase(sstr);
    for i:= 1 to length(s) do if s[i] <> ' ' then 
        begin
            istr[i]:= upcase(istr[i]); 
            exit;
        end;

end;

Procedure Corracr(var str: string);
var
    pstr, istr, sstr: string;
begin
    if pos('\indexentry{acr',str)=1 then 
    begin
        istr:=AfterSubentry(str);
        pstr:=pagestr(str);
        Setfirstcap(istr, sstr); {We do not use the sortstring here}
        sstr:=Beforesubentry(str);
        str:= '\indexentry{'+sstr+'!' + istr + '}' + pstr;
        
    end;
end;


Procedure Corrchem(var str: string);
var
    pstr, istr, sstr: string;
begin
    if pos('\indexentry{chem',str)=1 then 
    begin
        istr:=Indexstr(str);
        pstr:=pagestr(str);
        Setfirstcap(istr, sstr);

        str:= '\indexentry{chem '+sstr+'@' + istr + '}' + pstr;
        
    end;
end;


Procedure Corrgen(var str: string); 
var
    i,p: longint;
begin

    if pos('\indexentry{gen',str)=1 then 
    begin
        if pos('!',str) >0 then message('A subentry in general index');
        p:=pos('@',str);
        if p > 0 then 
        begin 
        {We look only to make small case before '@'}
        for i:= 13 to p do str[i]:= lowercase(str[i]);
        {We look only to capitalize the first letter after '@'}
        for i:= p to length(str) do if str[i] in ['A'..'Z', 'a'..'z'] then 
                begin
                str[i] :=Upcase(str[i]); exit;
                end;    
        end;
    end;
end;



begin

    init;
    Message('start processing');
    Message('from '+tfn+' into '+ofn);
    while not eof(s)
    do
    begin
        readln(s,str);
        inc(LineNumber);
        if pos('}{',str)=0 then Error('no pagestring');
        if pos('@',str)=0 then Error('no sortstring');
        CleanSoftHyphen(str);
        if not subheading(str) then
            begin
                corracr(str);
                corrchem(str);
                corrgen(str);
            end;    
            writeln(t,str);
    end;
    
    finish;


end.