/*
 * Awp2txt.c v1.0 - Andy McFadden, June 1998
 * This program is in the public domain.
 *
 * Convert an AppleWorks Word Processor file (ProDOS file type $1A) to
 * text.  Converts a single file given as an argument, or converts from
 * stdin if the file is "-".  Output is sent to stdout.
 *
 * This is a quick converter intended to produce readable output similar
 * to how it appears *within AppleWorks*.  I don't try to fill text, so
 * this won't match the printer output.  You will also see page header
 * lines within the text.
 *
 * Compile this with something like "gcc -O -s awp2txt.c -o awp2txt".
 * Tested under Solaris 2.5 and SunOS 4.1.1, with some simple AppleWorks 3.0
 * documents.
 *
 * (1 tab == 4 spaces)
 */
#include <stdio.h>
#include <memory.h>
#include <errno.h>
#include <assert.h>


/*
 * ===========================================================================
 *		Defines
 * ===========================================================================
 */

/* this would normally be in a header, but I want to distribute as one file */

/*
 * AppleWorks file format, from FTN.1A.xxxx.
 *
 * The file format is:
 *
 *	file header
 *	array of line records
 *	$ff $ff
 *	optional tags
 */

typedef unsigned char uchar;
typedef unsigned char Boolean;
#define false	0
#define true	(!false)


/*
 * File header, mapped directly on top of the input.  This structure must
 * be exactly 300 bytes.
 */
typedef struct FileHeader {
	uchar		unused1[4];		/* 000 - 003: not used */
	uchar		seventyNine;	/* 004      : $4f (79) */
	uchar		tabStops[80];	/* 005 - 084: tab stops, one of "=<^>.|" */
	uchar		zoomFlag;		/* 085      : boolean Zoom flag */
	uchar		unused2[4];		/* 086 - 089: not used */
	uchar		paginatedFlag;	/* 090      : boolean Paginated flag */
	uchar		minLeftMargin;	/* 091      : minimum "unseen" left margin */
	uchar		mailMergeFlag;	/* 092      : boolean - file has merge cmds */
	uchar		unused3[83];	/* 093 - 175: not used, reserved */
	uchar		multiRulerFlag;	/* 176      : (3.0) boolean Multiple Rulers */
	uchar		tabRulers[6];	/* 177 - 182: (3.0) used internally */
	uchar		sfMinVers;		/* 183      : (3.0) min version of AW req */
	uchar		unused4[66];	/* 184 - 249: reserved */
	uchar		unused5[50];	/* 250 - 299: available */
} FileHeader;

#define kFileHeaderSize		300		/* expected size of FileHeader */
#define kSeventyNine		79		/* value of FileHeader.seventyNine */

#define kSFMinVers30		30		/* indicates AW 3.0 */
#define kTabFlagsIsRuler	0xff	/* compare against byte 2 of text record */
#define kCRatEOL			0x80	/* flag on byte 3 of the text record */
#define kMinTextChar		0x20	/* values from 0x01 - 0x1f are special */
#define kEOFMarker			0xff	/* two of these found at end of file */

#define kLineRecordText				0x00
#define kLineRecordCarriageReturn	0xd0
#define kLineRecordCommandMin		0xd4
#define kLineRecordCommandMax		0xff

#define kSpecialCharTabFill			0x17	/* tab fill char, not vis in doc */

#define kMaxSoftFailures	8	/* give up if it looks like junk */


/*
 * Current state of the document.
 */
typedef struct DocState {
	long		softFailures;
	long		line;

	/* not using these yet */
	Boolean		bold;
	Boolean		underline;
	int			leftMargin;
} DocState;


/*
 * ===========================================================================
 *		Meaty bits
 * ===========================================================================
 */

/*
 * Initialize the DocState structure, possibly using some of the fields
 * from the file header.
 */
static void
InitDocState(DocState* pDocState, const FileHeader* pFileHeader)
{
	assert(pDocState != NULL);
	assert(pFileHeader != NULL);

	memset(pDocState, 0, sizeof(*pDocState));
	pDocState->line = 1;
}


/*
 * Handle a text record.  The first two bytes are flags, the rest is
 * either the text or a ruler.  Special codes may be embedded in the text.
 *
 * "lineRecData" has the number of bytes of input that we have yet to read.
 */
static int
HandleTextRecord(DocState* pDocState, uchar lineRecData, FILE* infp,FILE* outfp)
{
	int err = 0;
	uchar tabFlags;
	uchar byteCountPlusCR;
	int byteCount = lineRecData;
	Boolean noOutput = false;
	int ic;

	assert(pDocState != NULL);
	assert(infp != NULL);
	assert(outfp != NULL);

	tabFlags = getc(infp);
	byteCount--;
	byteCountPlusCR = getc(infp);
	byteCount--;
	if (feof(infp) || ferror(infp)) {
		err = errno ? errno : -1;
		goto bail;
	}

	if (byteCount <= 0) {
		fprintf(stderr, "WARNING: line %ld: short line (%d)\n",
			pDocState->line, byteCount);
		/* this is bad, but keep going anyway */
	}

	if ((byteCountPlusCR & ~kCRatEOL) != byteCount) {
		fprintf(stderr,
			"WARNING: line %ld: byteCount now %d, offset 3 count %d\n",
			pDocState->line, byteCount, byteCountPlusCR & ~kCRatEOL);
		/* not sure why this would legally happen */
	}

	if (tabFlags == kTabFlagsIsRuler)
		noOutput = true;

	while (byteCount--) {
		ic = getc(infp);
		if (ic == EOF) {
			fprintf(stderr, "ERROR: unexpected EOF hit\n");
			err = -1;
			goto bail;
		}

		if (noOutput)
			continue;

		if (ic < kMinTextChar) {
			/* just skip special chars for now */
			if (ic == kSpecialCharTabFill)
				putc(' ', outfp);
			else
				putc('^', outfp);
			continue;
		}

		putc(ic, outfp);
	}

	/* ignore the (byteCountPlusCR & kCRatEOL); it's redundant for us */

	/* always put a return at end of *our* lines */
	putc('\n', outfp);

	/* another line processed, advance the line counter */
	pDocState->line++;

bail:
	return err;
}

/*
 * Process a line record.
 */
static int
ProcessLineRecord(DocState* pDocState, uchar lineRecData, uchar lineRecCode,
	FILE* infp, FILE* outfp)
{
	int err = 0;

	if (lineRecCode == kLineRecordCarriageReturn) {
		/* ignore the horizontal offset for now */
		putc('\n', outfp);
	} else if (lineRecCode == kLineRecordText) {
		err = HandleTextRecord(pDocState, lineRecData, infp, outfp);
	} else if (lineRecCode >= kLineRecordCommandMin &&
			   lineRecCode <= kLineRecordCommandMax)
	{
		/* ignore commands for now */
		/*printf("<%02x>", lineRecCode);*/
	} else {
		/* bad command */
		fprintf(stderr, "WARNING: unrecognized code 0x%02x\n", lineRecCode);
		pDocState->softFailures++;
		if (pDocState->softFailures > kMaxSoftFailures) {
			fprintf(stderr, "ERROR: too many failures, giving up\n");
			err = -1;
		}
	}

	return err;
}


/*
 * Convert the input AWP file to a text file.
 *
 * Returns zero on success, nonzero on failure.
 */
int
ConvertAWPToText(FILE* infp, FILE* outfp)
{
	int err = 0;
	FileHeader fileHeader;
	DocState docState;
	Boolean skipRecord;
	uchar lineRecCode, lineRecData;

	assert(infp != NULL);
	assert(outfp != NULL);
	assert(sizeof(fileHeader) == kFileHeaderSize);

	/*
	 * Read the file header.
	 */
	if (fread(&fileHeader, sizeof(fileHeader), 1, infp) != 1) {
		err = errno ? errno : -1;
		perror("read failed");
		goto bail;
	}

	/* do some quick sanity checks */
	if (fileHeader.seventyNine != kSeventyNine) {
		fprintf(stderr, "ERROR: expected %d in signature byte, found %d\n",
			kSeventyNine, fileHeader.seventyNine);
		err = -1;
		goto bail;
	}
	if (fileHeader.sfMinVers && fileHeader.sfMinVers != kSFMinVers30) {
		fprintf(stderr, "WARNING: unexpected value %d for sfMinVers\n",
			fileHeader.sfMinVers);
		/* keep going */
	}

	InitDocState(&docState, &fileHeader);

	skipRecord = false;
	if (fileHeader.sfMinVers == kSFMinVers30) {
		/* first line record is bad; ignore it */
		skipRecord = true;
	}

	/*
	 * Read the line records.
	 */
	while (1) {
		lineRecData = getc(infp);
		lineRecCode = getc(infp);

		/* check status of input file */
		if (feof(infp) || ferror(infp)) {
			err = -1;
			goto bail;
		}

		if (skipRecord) {
			skipRecord = 0;
			continue;
		}

		/* end of data reached? */
		if (lineRecData == kEOFMarker && lineRecCode == kEOFMarker)
			break;

		err = ProcessLineRecord(&docState, lineRecData, lineRecCode, infp,
				outfp);
		if (err)
			goto bail;
	}

	putc('\n', outfp);

	/* 
	 * Read the optional tags.
	 */
	/* (nah) */

bail:
	return err;
}


/*
 * How does this thing work, anyway?
 */
static void
Usage(const char* argv0)
{
	fprintf(stderr, "Usage: %s filename  (use '-' for stdin)\n", argv0);
}

/*
 * Simple arg processing.
 */
int
main(int argc, char** argv)
{
	FILE* infp = NULL;
	int result;

	if (argc != 2) {
		Usage(argv[0]);
		exit(2);
	}

	if (strcmp(argv[1], "-") == 0)
		infp = stdin;
	else {
		infp = fopen(argv[1], "r");
		if (infp == NULL) {
			perror("fopen input");
			exit(1);
		}
	}

	result = ConvertAWPToText(infp, stdout);

	if (infp != NULL && infp != stdin)
		fclose(infp);
	exit(result != 0);
}

