 * splitindex.c
 * Copyright (c) Markus Kohm, 2002
 * $Id: splitindex.c,v 1.3 2009-03-20 14:55:11 mjk Exp $
 * This file is part of the SplitIndex bundle.
 * This work may be distributed and/or modified under the conditions of
 * the LaTeX Project Public License, version 1.3c of the license.
 * The latest version of this license is in
 *   http://www.latex-project.org/lppl.txt
 * and version 1.3c or later is part of all distributions of LaTeX
 * version 2005/12/01 or later and of this work.
 * This work has the LPPL maintenance status "author-maintained".
 * The Current Maintainer and author of this work is Markus Kohm.
 * The list of all files belongig to the SplitIndex bundle is given in
 * in the file `manifest.txt'. Files generated by means of unpacking the
 * distribution (using, for example, the docstrip program) or by means
 * of compiling them from a source file, for example, from splitindex.c
 * or splitindex.java may be distributed at the distributor's discretion.
 * However if they are distributed then a copy of the SplitIndex bundle
 * must be distributed together with them.
 * The list of derived (unpacked or compiled) files belongig to the 
 * distribution and covered by LPPL is defined by the unpacking scripts 
 * (with extension .ins) and the installation script (with name 
 * install.sh) which are part of the distribution.
 * Two often ignorred clauses from LPPL 1.3c you should not ignore:
 * ----------------------------------------------------------------
 * 2. You may distribute a complete, unmodified copy of the Work as you
 *    received it.  Distribution of only part of the Work is considered
 *    modification of the Work, and no right to distribute such a Derived
 *    Work may be assumed under the terms of this clause.
 * 3. You may distribute a Compiled Work that has been generated from a
 *    complete, unmodified copy of the Work as distributed under Clause 2
 *    above, as long as that Compiled Work is distributed in such a way that
 *    the recipients may install the Compiled Work on their system exactly
 *    as it would have been installed if they generated a Compiled Work
 *    directly from the Work.

#include <sys/types.h>
#include <sys/wait.h>

#include <ctype.h>
#include <errno.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

enum { 
    OPT_HELP = 'h', OPT_MAKEINDEX = 'm', OPT_IDENTIFY = 'i', 

#if defined(__OpenBSD__) || defined(__FreeBSD__) || defined (NO_LONGOPT)

#define HAS_LONGOPT 0
#define HAPAR       ""

#else /* defined(__OpenBSD__) || defined(__FreeBSD__) || defined(NO_LONGOPT) */

#define HAS_LONGOPT 1
#define HAPAR       "\n\t\t"

#include <getopt.h>

static const struct option long_options[] = {
    { "help", 0, NULL, OPT_HELP },
    { "makeindex", 1, NULL, OPT_MAKEINDEX },
    { "identify", 1, NULL, OPT_IDENTIFY },
    { "resultis", 1, NULL, OPT_RESULTIS },
    { "suffixis", 1, NULL, OPT_SUFFIXIS },
    { "verbose", 0, NULL, OPT_VERBOSE },
    { "version", 0, NULL, OPT_VERSION },
    { NULL, 0, NULL, 0 },


static const char short_options[] = {
    OPT_IDENTIFY, ':',
    OPT_RESULTIS, ':',
    OPT_SUFFIXIS, ':',

int Verbose = 0;

char *MakeIndex = "makeindex";
char *Identify = "^(\\\\indexentry)\\[([^]]*)\\](.*)$";
char *ResultIs = "$1$3";
char *SuffixIs = "-$2";
char *IDX = NULL;
char *Jobname = NULL;
char * const * MakeIndexArgs = NULL;
char *prgname = "splitindex";
int MakeIndexArgc = 0;

static void show_version( void ) {
    printf( "splitindex 0.2a\n"
	    "Copyright (c) 2002 Markus Kohm <kohm@gmx.de>\n" );

static void show_usage( FILE *out ) {
    fputs( "Usage: splitindex [OPTION]... RAWINDEXFILE [MAKEINDEXOPTION]...\n",
	   out );

static void usage_error( const char *format, ... ) {
    if ( format != NULL ) {
	va_list ap;
	va_start( ap, format );
	vfprintf( stderr, format, ap );
	va_end( ap );
    fputs( "Try `splitindex "
	   "' for more information.\n", stderr );
    exit( 1 );

static void show_help( void ) {
    printf( "\n" );
    show_usage( stdout );
	"Split a single raw index file into multiple raw index files.\n"
        "Example: splitindex.pl foo.idx.\n"
        "  -h"
        ", --help    "
        "            "
        "\tshow this help and terminate\n"
        "  -m"
	", --makeindex"
        "\tcall PROGNAME instead of default `%s'.\n", 
        "  -i"
	", --identify"
        "\tuse regular EXPRESSION to match entries\n"
        "\t\t\t(see also option --resultis and --suffixis).\n"
        "\t\t\tDefault is `%s'.\n", 
        "  -r"
	", --resultis"
        "\tcreate line to be written from PATTERN after matching\n"
        "\t\t\tlines (see also option --identify).\n"
        "\t\t\tDefault is `%s'.\n",
        "  -s"
	", --suffixis"
        "\tcreate suffix to be used from PATTERN after matching\n"
        "\t\t\tlines (see also option --identify).\n"
        "\t\t\tDefault is `%s'.\n",
        "  -v"
	", --verbose "
	"            "
        "\tbe more verbose\n"
        "\t\t\t(can be used multiple to increase verbosity)\n"
        "  -V"
	", --version "
        "            "
        "\tshow version and terminate\n"

static void ScanArguments( int argc, char * const argv[] ) {
    int retVal;
    char *hs;
    if ( argc > 0 )
	prgname = argv[0];
    while ( ( retVal = getopt_long( argc, argv, short_options, 
				    long_options, NULL ) ) != - 1 ) {
    while ( ( retVal = getopt( argc, argv, short_options ) ) != - 1 ) {
	switch( retVal ) {
	    case OPT_HELP:
		break; /* should never be reached */
	    case OPT_VERSION:
		exit( 0 );
		break; /* should never be reached */
	    case OPT_MAKEINDEX:
		MakeIndex = optarg;
	    case OPT_IDENTIFY:
		Identify = optarg;
	    case OPT_RESULTIS:
		ResultIs = optarg;
	    case OPT_SUFFIXIS:
		SuffixIs = optarg;
	    case OPT_VERBOSE:
	    case ':':
	    case '?':
		usage_error( NULL );
		break; /* should never be reached */
    if ( optind < argc ) {
	IDX = argv[optind++];
	MakeIndexArgc = argc - optind;
	MakeIndexArgs = argv + optind;
    } else {
	usage_error( "missing raw index file\n" );
	return; /* should never be reached */

    if ( ( Jobname = strdup( IDX ) ) == NULL ) {
	perror( prgname );
        exit( errno );

    if ( ( ( hs = strrchr( Jobname, '.' ) ) != NULL ) 
	 && !strcmp( hs, ".idx" ) )
	*hs = 0;

static char *regsub( const char *string, const char *replace, 
		     int nmatch, regmatch_t pmatch[] ) {
    int pass;
    int size, n;
    const char *hs;
    char c;
    char *retVal = NULL;
    char *ts;
    /* Two times parsing to get size and replace */
    for ( pass = size = 0; pass < 2; pass++ ) {
	ts = retVal;
	for ( hs = replace; *hs; hs++ ) {
	    switch( *hs ) {
		case '$':
		    if ( isdigit( hs[1] ) ) {
			n = *++hs - '0';
			if ( ( pmatch[n].rm_so >= 0 ) 
			     && ( pmatch[n].rm_so < pmatch[n].rm_eo ) ) {
			    /* maybe group n is not empty */
			    if ( pass ) {
                                strncpy( ts, 
					 string + pmatch[n].rm_so,
					 pmatch[n].rm_eo - pmatch[n].rm_so );
				ts += pmatch[n].rm_eo - pmatch[n].rm_so;
			    } else {
				size += pmatch[n].rm_eo - pmatch[n].rm_so;
		    } else {
			if ( pass ) {
			    *ts++ = '$';
			} else {
		case '\\':
		    switch( c = *++hs ) {
			case 0:   c = '\\'; hs--; break;
			case 'b': c = '\b'; break;
			case 'f': c = '\f'; break;
			case 'n': c = '\n'; break;
			case 'r': c = '\r'; break;
			case 't': c = '\t'; break;
		    if ( pass ) {
			*ts++ = c;
		    } else {
		    if ( pass ) {
			*ts++ = *hs;
		    } else {
	if ( pass ) {
	    *ts = '\0';
	} else {
	    if ( ( retVal = malloc( size + 1 ) ) == NULL ) {
		perror( prgname );
		exit( 1 );

    return retVal;

static struct list {
    struct list *next;
    char *name;
    FILE *file;
} *IDXfiles = NULL;

static FILE *findsuffix( const char *name ) {
    struct list *run;
    for ( run = IDXfiles; 
	  run != NULL;
	  run = run->next ) {
	const char *suffix = strchr( run->name, '\0' );
	suffix -= 4; /* .idx */
	suffix -= strlen( name );
	if ( ( suffix >= run->name ) 
	     && !strncmp( suffix, name, strlen( name ) ) ) {
	    return run->file;
    return NULL;

static struct list *addentry( char *name, FILE *file ) {
    struct list *newentry = malloc( sizeof( struct list ) );
    if ( !newentry ) {
	perror( prgname );
	exit( 1 );
    newentry->name = name;
    newentry->file = file;
    if ( IDXfiles == NULL ) {
        newentry->next = NULL;
    } else {
	newentry->next = IDXfiles;
    IDXfiles = newentry;
    return IDXfiles;

static FILE *newentry( const char *suffix ) {
    char *name;
    FILE *file;

    if ( ( name = malloc( strlen( Jobname ) + strlen( suffix ) + 5 ) ) == NULL ) {
	perror( prgname );
	exit( 1 );

    strcpy( name, Jobname );
    strcat( name, suffix );
    strcat( name, ".idx" );
    if ( ( file = fopen( name, "w" ) ) == NULL ) {
	perror( name );
	exit( errno );

    if ( Verbose > 1 )
	printf( "New index file %s\n", name );
    addentry( name, file );
    return file;

static void CloseAllIDX( void ) {
    struct list *run;
    for ( run = IDXfiles; run != NULL; run = run->next )
	if ( fclose( run->file ) ) {
	    perror( run->name );
	    exit( 1 );

static void CallMakeIndex( void ) {
    char **argv;
    int idx;
    struct list *run, *next;

    if ( ( argv = calloc( MakeIndexArgc + 3, sizeof( char * ) ) ) == NULL ) {
	perror( prgname );
	exit( 0 );

    for ( idx = 0; idx < MakeIndexArgc; idx++ )
	argv[idx+1] = MakeIndexArgs[idx];

    argv[0] = MakeIndex;

    for ( run = IDXfiles; run != NULL; run = next ) {
	pid_t fret;
	int status;
	argv[idx] = run->name;

	if ( ( fret = fork() ) == 0 ) {
	    /* This is the child */
	    if ( Verbose > 1 ) {
		int i;
		for ( i = 0; argv[i] != NULL; i++ )
		    printf( "\"%s\" ", argv[i] );
		printf( "\n" );
	    if ( execvp( MakeIndex, argv ) ) {
		perror( MakeIndex );
		exit( 1 );
	} else if ( fret == -1 ) {
	    perror( prgname );
	    exit( 1 );
	} else {
	    /* Parent */
	    waitpid( fret, &status, 0 );
	    if ( ! WIFEXITED( status ) ) {
		fprintf( stderr, "%s terminated abnormally!\n", MakeIndex );
		exit( 1 );
	free( (void *)run->name );
	next = run->next;
	free( run );


static void ProcessIDXFile( void ) {
    FILE *fIDX;
    char *line;
    regex_t preg;
    int retVal;
    int buffersize = 2050;
    regmatch_t pmatch[10];

    if ( ( line = malloc( buffersize ) ) == NULL ) {
	perror( prgname );
	exit( errno );

    if ( ( retVal = regcomp( &preg, Identify, REG_EXTENDED ) ) != 0 ) {
	regerror( retVal, &preg, line, buffersize );
	fprintf( stderr, "Error at identify: %s\n", line );
	free( line );
	exit( 1 );

    if ( ( fIDX = fopen( IDX, "r" ) ) == NULL ) {
	if ( !strcmp ( IDX, Jobname ) ) {
	    if ( ( IDX = malloc( strlen( Jobname + 5 ) ) ) == NULL ) {
		perror( prgname );
		regfree( &preg );
		free( line );
		exit( errno );
	    strcpy( IDX, Jobname );
	    strcat( IDX, ".idx" );
	    if ( ( fIDX = fopen( IDX, "r" ) ) == NULL ) {
		fprintf( stderr, 
			 "Can read neither file %s nor file %s\n",
			 Jobname, IDX );
		regfree( &preg );
		free( line );
		exit( 1 );
	} else {
	    fprintf( stderr,
		     "Can't red file %s\n", IDX );
	    regfree( &preg );
	    free( line );
	    exit( 1 );

    while ( fgets( line, buffersize, fIDX ) != NULL ) {
	char *hs;
	char *result, *suffix;
	FILE *f;
	while ( ( ( hs = strchr( line, '\n' ) ) == NULL ) 
		&& ! feof( fIDX ) ) {
	    if ( ( line = realloc( line, 2 * buffersize ) ) == NULL ) {
		perror( prgname );
		exit( errno );
	    if ( fgets( line + buffersize - 1, buffersize + 1, fIDX ) == NULL )
	    buffersize *= 2;
	if ( ferror( fIDX ) )
	if ( ( hs = strchr( line, '\n' ) ) != NULL ) // remove newline
	    *hs = 0;
	if ( ( retVal = regexec( &preg, line, 10, pmatch, 0 ) ) == 0 ) {
	    result = regsub( line, ResultIs, 10, pmatch );
	    suffix = regsub( line, SuffixIs, 10, pmatch );
	} else {
	    int i;
	    result = strdup( line );
	    for ( i = 0; i < 10; i++ ) {
		pmatch[i].rm_so = 0;
		pmatch[i].rm_eo = 3;
	    suffix = regsub( "idx", SuffixIs, 10, pmatch );
	if ( !result || !suffix ) {
	    perror( prgname );
	    exit ( 1 );

	if ( ( f = findsuffix( suffix ) ) == NULL )
	    f = newentry( suffix );
	fprintf( f, "%s\n", result );

	free( suffix );
	free( result );

    regfree( &preg );
    free( line );
    if ( ferror( fIDX ) ) {
	perror( IDX );
	fclose( fIDX );
	exit( errno );
    fclose( fIDX );



int main( int argc, char * const argv[] ) {
    int retVal = 0;

    ScanArguments( argc, argv );

    if ( Verbose > 0 ) {
	if ( Verbose > 9 ) {
	    int i;
	    printf( "Identify:  \"%s\"\n", Identify );
	    printf( "ResultIs:  \"%s\"\n", ResultIs );
	    printf( "SuffixIs:  \"%s\"\n", SuffixIs );
	    printf( "IDX:       \"%s\"\n", IDX );
	    printf( "Jobname:   \"%s\"\n", Jobname );
	    printf( "MakeIndex: \"%s\"", MakeIndex );
	    for ( i = 0; i < MakeIndexArgc; i++ )
		printf( " \"%s\"", MakeIndexArgs[i] );
	    printf( "\n" );


    return retVal;