/* tty2html.c -- filter nroff output for inclusion in HTML    -*- C -*- */

/* Copyright (c) 2005 Ian Piumarta
 * 
 * All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the 'Software'),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, provided that the above copyright notice(s) and this
 * permission notice appear in all copies of the Software and that both the
 * above copyright notice(s) and this permission notice appear in supporting
 * documentation.
 *
 * THE SOFTWARE IS PROVIDED 'AS IS'.  USE ENTIRELY AT YOUR OWN RISK.
 */

/* Last edited: 2005-11-03 03:38:32 by piumarta on margaux.local
 * 
 * BUGS:
 *   - output lines are silently truncated at 132 characters
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "config.h"


char buffer[132];	/* one line of buffered output */
int  attr[132];		/* attributes for each character in the buffer */
int  position=  0;	/* current output column */

int  optItalic= 0;	/* 1 => generate italic, 0 => underlined text */
int  optStyle=  0;	/* 1 => <DIV> and <SPAN>, 0 => <B> and <U> */
int  optSpace=  0;	/* 1 => make all spaces non-breakable */
int  optPre=    0;	/* 1 => <PRE>formatted text ... </PRE> */
int  optPage=   0;	/* 1 => generate complete document */

char *title=    0;


enum {
  BOLD=  (1<<0),	/* character is bold */
  UNDER= (1<<1)		/* character is underlined */
};


/* prepare for a new output line
 */
void reset(void)
{
  position= 0;				/* column zero */
  memset(buffer, 0, sizeof(buffer));	/* no text attributes */
  memset(attr,   0, sizeof(attr));	/* all columns empty */
}

/* turn on an attribute in HTML
 */
void begin(const char *tag)
{
  if (optStyle)
    printf("<span class=\"%s\">", tag);
  else
    printf("<%s>", tag);
}

/* turn off an attribute in HTML
 */
void end(const char *tag)
{
  if (optStyle)
    printf("</span>");
  else
    printf("</%s>", tag);
}

/* generate tags to turn on a text attribute
 */
void enable(int mask)
{
  if (mask & BOLD)  begin("b");
  if (mask & UNDER) begin(optItalic ? "i" : "u");
}

/* generate tags to turn off a text attribute
 */
void disable(int mask)
{
  if (mask & BOLD)  end("b");
  if (mask & UNDER) end(optItalic ? "i" : "u");
}

/* ship the buffer to the output
 */
void ship(void)
{
  int i= 0, decor= 0;			/* decor = active text attributes */

  while (i < position)			/* for each column... */
    {
      int changed= decor ^ attr[i];	/* attributes changed since last column */

      if (changed)
	{
	  disable(changed & decor);	/* turn off attributes that went 1 -> 0 */
	  enable(changed & ~decor);	/* turn on  attributes that went 0 -> 1 */
	  decor ^= changed;		/* toggle changed attributes */
	}

      switch (buffer[i])		/* send the character */
	{
	  /* make spaces non-breakable if required
	   */
	case ' ':
	  if (optSpace)
	    printf("&nbsp;");
	  else
	    putchar(buffer[i]);
	  break;

	  /* convert meta chars to named entities
	   */
	case '"':  printf("&quot;");	break;
	case '&':  printf("&amp;");	break;
	case '<':  printf("&lt;");	break;
	case '>':  printf("&gt;");	break;

	  /* anything else goes out verbatim
	   */
	default:   putchar(buffer[i]);	break;
	}
      ++i;				/* next column */
    }
  /* turn off all attributes at EOL
   */
  disable(decor);
}

/* Send the given character to the output buffer, dealing with
   overstrikes.
 */
void send(int c)
{
  if (position < sizeof(buffer))	/* truncate at column 132 */
    {
      if (buffer[position])		/* character already in this column */
	{
	  if (c == buffer[position])	/* same char: make it bold */
	    attr[position] |= BOLD;
	  else if ('_' == buffer[position])	/* underscore already in column */
	    {
	      attr[position] |= UNDER;		/* underline output */
	      buffer[position]= c;		/* retain non-underscore char */
	    }
	  else if ('_' == c)		/* overprinting with underscore */
	    attr[position] |= UNDER;
	  else
	    {
	      fprintf(stderr, "Overstrike %02X %02X?\n", buffer[position], c);
	      buffer[position]= c;
	    }
	}
      else
	buffer[position]= c;		/* first time at this column */
    }
  ++position;				/* advance to next column */
}

/* Filter characters on their way to the output buffer to deal with
 * anything that affects the output column.
 */
void filter(int c)
{
  switch (c)
    {
    case '\b':			/* backspace: back up one column */
      if (position) --position;
      break;

    case '\t':			/* tab: expand as spaces */
      {
	int n= 8 - (position & 7);
	while (n--)
	  send(' ');
      }
      break;

    default:			/* send it unmodified to output buffer */
      send(c);
      break;
    }
}

void usage(const char *program, int help)
{
  fprintf(stderr, "%s\n", PACKAGE_NAME" "PACKAGE_VERSION" "PACKAGE_COPYRIGHT);
  fprintf(stderr, "report bugs to: %s\n", PACKAGE_BUGREPORT);
  if (help)
    {
      fprintf(stderr, "\nusage: %s [-dtitle] [-i] [-n] [-p] [-s]\n", program);
      fprintf(stderr, "  -dtitle -- make a complete HTML document\n");
      fprintf(stderr, "  -i      -- generate italics not underlines\n");
      fprintf(stderr, "  -n      -- generate non-breakable spaces\n");
      fprintf(stderr, "  -p      -- make preformatted output\n");
      fprintf(stderr, "  -s      -- generate div and spans (for style sheets)\n");
    }
  exit(0);
}

int main(int argc, char **argv)
{
  int  argno= 1;
  char line[132];

  while (argno < argc)
    {
      if     (!strncmp(argv[argno], "-d", 2))  (optPage=   1), (title= argv[argno] + 2);
      else if (!strcmp(argv[argno], "-i"))	optItalic= 1;
      else if (!strcmp(argv[argno], "-n"))	optSpace=  1;
      else if (!strcmp(argv[argno], "-p"))	optPre=    1;
      else if (!strcmp(argv[argno], "-s"))	optStyle=  1;
      else if (!strcmp(argv[argno], "-v"))	usage(argv[0], 0);
      else					usage(argv[0], 1);
      ++argno;
    }

  if (optPage)
    {
      puts(  "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">");
      puts(  "<html lang=\"en\">");
      puts(  "<head>");
      puts(  "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">");
      puts(  "<meta name=\"generator\" content=\"tty2html http://piumarta.com/software\">");
      printf("<title>%s</title>", title);
      if (optStyle)
	puts("<link rel=\"stylesheet\" href=\"man.css\" type=\"text/css\">");
      puts(  "</head>");
      puts(  "<body>");
    }

  if (optStyle) printf("<div class=\"nroff\">");
  if (optPre)   printf("<pre>");

  while (fgets(line, sizeof(line), stdin))
    {
      char *ptr= line;
      int   c;
      reset();					/* begin a new output line */
      while ((c= *ptr++)) filter(c);		/* collect each input char */
      ship();					/* ship the output line to stdout */
      if (!optPre) printf("<br>");		/* linebreak if not preformatted */
    }

  if (optPre)   printf("</pre>\n");
  if (optStyle) printf("</div>\n");

  if (optPage)
    {
      puts("</body>");
      puts("</html>");
    }

  return 0;
}

