-h- lzdcm1.c Sat Mar 26 16:57:08 1988 USER1:[MINOW.PERSONAL.SOURCE.LZ]LZDCM1.C;63 /* * lzdcmp [-options] [infile [outfile]] */ #ifdef DOCUMENTATION title lzdcmp File Decompression index File decompression synopsis .s.nf lzdcmp [-options] [infile [outfile]] .s.f description lzdcmp decompresses files compressed by lzcomp. The documentation for lzcomp describes the process in greater detail. Options may be given in either case. .lm +8 .p -8 -B Output file is "binary", not text. (Ignored in VMS private mode.) On VMS, this generates a stream file without carriage-control attributes. On Decus C systems, it generates a "fixed block, 512 byte record" file. It is unneeded on Unix. .p -8 -F Output file is "fixed block, 512 bytes." This is used only by VMS for reading files created by Unix compress or by "export" mode. It is identical to binary on Decus C. .p -8 -X 3 To read files compressed by an old Unix version that doesn't generate header records. .p -8 -V val Verbose (print status messages and debugging information). The value selects the amount of verbosity. See the lzcomp documentation for details. VMS Command Language Interface In addition to the above (Unix-style) command line interface, lzcomp supports a VMS command line interface. The following options are available: .lm +8 .p -8 /EXPORT=(VMS, UNIX, HEADER) .p -8 /MODE=(TEXT, BINARY, FIXED) .p -8 /SHOW=(ALL, PROGRESS, STATISTICS, FDL, DEBUG, DEBUG_SERIOUS, DEBUG_IO) .lm -8 VMS private mode If the file was compressed in VMS private mode, all information needed to reconstruct the file is stored in the compressed file, using the VMS run-time library FDL routines. This means that the expanded file will have the same name and directory location it had originally. If the directory structure does not exist (for example, because you have moved the compressed file to another machine), you must specify the second argument to lzdcmp to specify the file name. Author This version by Martin Minow. See lzcomp for more details. #endif /* * Compatible with compress.c, v3.0 84/11/27 */ /*)BUILD $(PROGRAM) = lzdcmp $(INCLUDE) = lz.h $(CPP) = 1 $(RMS) = 1 $(FILES) = { lzdcm1.c lzdcm2.c lzdcm3.c lzio.c lzvio.c } */ #include "lz.h" /* * These global parameters are read from the compressed file. * The decompressor needs them. */ short maxbits = BITS; /* settable max # bits/code */ code_int maxmaxcode = 1 << BITS; /* * Note, if export is zero or 1, the "true" value will be set from * the file header. If it is 2, no header will be read. */ #if VMS_V4 flag export = EXPORT_VMS /* Assume VMS private */ | EXPORT_HEADER | EXPORT_BLOCK | EXPORT_ENDMARKER; #else flag export = EXPORT_UNIX /* Assume standard Unix */ | EXPORT_HEADER | EXPORT_BLOCK | EXPORT_ENDMARKER; #endif flag method = METHOD_LZ; /* Of course */ flag mode = MODE_TEXT; flag show = 0; /* No statistics */ flag background = FALSE; /* TRUE (Unix) if detached */ flag is_compress = FALSE; /* For lzio.c and lzdcl.c */ char *infilename = NULL; /* For error printouts */ char *outfilename = NULL; /* For openoutput and errors */ int firstcode; /* First code after internals */ static long start_time; /* Time we started (in msec) */ extern long cputime(); /* Returns process time in msec */ jmp_buf failure; STREAM instream; STREAM outstream; char_type inbuffer[MAXIO]; char_type outbuffer[MAXIO]; #if VMS_V4 FDLSTUFF *fdl_input; FDLSTUFF *fdl_output; static struct dsc$descriptor fdl_descriptor; int xab_lrl = 0; static STREAM mem_stream; typedef struct more_attributes { char *name; int type; char *result; /* Actually a union (int *, char *) */ } MORE_ATTRIBUTES; #define STRING 0 #define INTEGER 1 /* * Additional attributes (put into the compressed file by lzcomp) * are extracted using this extensible table. */ static MORE_ATTRIBUTES more_attributes[] = { { "xabfhc$w_lrl", INTEGER, &xab_lrl }, NULL }; #endif main(argc, argv) int argc; char *argv[]; /* * Decompress mainline */ { int result; int status; extern code_int getcode(); #ifndef decus /* * background is TRUE if running detached from the command terminal. */ background = (signal(SIGINT, SIG_IGN) == SIG_IGN) ? TRUE : FALSE; if (!background) background = !isatty(fileno(stderr)); if (!background) { if ((show & (SHOW_DEBUG | SHOW_SERIOUS_DEBUG | SHOW_IO_DEBUG)) != 0) signal(SIGINT, abort); else { signal(SIGINT, interrupt); signal(SIGSEGV, address_error); } } #endif if ((status = setjmp(failure)) == 0) { setup(argc, argv); #if DEBUG if (show & SHOW_DEBUG) dumpoptions(); #endif do { openinput(); } while (get_magic_header()); /* Sets export, etc. */ openoutput(); if ((show & SHOW_STATISTICS) != 0) start_time = cputime(); init_decompress(); result = decompress(&outstream); if ((export & EXPORT_ENDMARKER) != 0 && result != LZ_ETX && getcode() != (code_int) LZ_ETX) { fprintf(stderr, "Decompress didn't finish correctly.\n"); goto fail; } lz_flush(&outstream); #if VMS_V4 if ((export & EXPORT_VMS) != 0) fdl_close(fdl_output); else { fclose(stdout); } #else fclose(stdout); #endif #if DEBUG if ((show & SHOW_SERIOUS_DEBUG) != 0) dump_tab(stderr); #endif if ((show & SHOW_STATISTICS) != 0) { start_time = cputime() - start_time; fprintf(stderr, "%ld.%02ld seconds (process time) for decompression.\n", start_time / 1000L, (start_time % 1000L) / 10L); } exit(EXIT_SUCCESS); } else { fail: fprintf(stderr, "Error when decompressing \"%s\" to \"%s\"\n", (infilename == NULL) ? "" : infilename, (outfilename == NULL) ? "" : outfilename); exit(status); } } static int get_magic_header() /* * Read the compressed file header. Note: if we switch from * Unix to VMS (or vice-versa), we must re-open the file in * the new mode. */ { int head1; int head2; int head3; #if VMS_V4 int old_export = export & (EXPORT_VMS | EXPORT_UNIX); #endif head2 = 0; if ((export & EXPORT_HEADER) != 0) { if ((head1 = GET(&instream)) != HEAD1_MAGIC) { fprintf(stderr, "Incorrect first header byte 0x%x\n", head1); FAIL("can't get header", EXIT_FAILURE); } head2 = GET(&instream); head3 = GET(&instream); switch (head2) { case HEAD2_MAGIC: export &= ~(EXPORT_VMS | EXPORT_BLOCK); export |= EXPORT_UNIX; mode &= ~MODE_DELTA; break; case VMS_HEAD2_MAGIC: export &= ~(EXPORT_UNIX | EXPORT_BLOCK); export |= EXPORT_VMS; mode &= ~MODE_DELTA; break; default: fprintf(stderr, "Incorrect second header byte 0x%x\n", head2); FAIL("can't get header", EXIT_FAILURE); } #if VMS_V4 /* * If export status changes, we must re-open the input file. */ if ((export & (EXPORT_VMS | EXPORT_UNIX)) != old_export) return (TRUE); #endif maxbits = head3 & BIT_MASK; if ((head3 & BLOCK_MASK) != 0) export |= EXPORT_BLOCK; if ((head3 & DIFF_MASK) != 0) mode |= MODE_DELTA; #if DEBUG if ((show & SHOW_DEBUG) != 0) { fprintf(stderr, "%s: compressed with %d bits,", infilename, maxbits); fprintf(stderr, " block compression %s.\n", (export & EXPORT_BLOCK) ? "enabled" : "disabled"); fprintf(stderr, " differential compression %s.\n", (mode & MODE_DELTA) ? "enabled" : "disabled"); } #endif } if (maxbits > BITS) { fprintf(stderr, "%s: compressed with %d bits,", infilename, maxbits); fprintf(stderr, " lzdcmp can only handle %d bits\n", BITS); FAIL("too many bits", EXIT_FAILURE); } maxmaxcode = 1 << maxbits; if ((export & EXPORT_VMS) != 0) firstcode = GET(&instream) + 0x100; /* From compressed file */ else if ((export & EXPORT_BLOCK) != 0) firstcode = LZ_CLEAR + 1; /* Default */ else firstcode = 256; /* Backwards compatible */ #if VMS_V4 if ((export & EXPORT_VMS) != 0) { register code_int code; char text[256]; extern code_int getcode(); auto int termin; /* * Get the attribute record. */ if ((code = getcode()) != LZ_SOH) { fprintf(stderr, "Expected header, read 0x%x\n", code); FAIL("can't get header (private)", EXIT_FAILURE); } init_decompress(); code = mem_decompress(text, sizeof text, &termin); text[code] = EOS; if (strncmp(text, ATT_NAME, ATT_SIZE) != 0) { fprintf(stderr, "Expected \"%s\", read \"%.*s\"\n", ATT_NAME, code, text); FAIL("can't get attribute block header", EXIT_FAILURE); } code = atoi(text + ATT_SIZE); fdl_descriptor.dsc$b_class = DSC$K_CLASS_S; fdl_descriptor.dsc$b_dtype = DSC$K_DTYPE_T; /* * Note: malloc should probably be lib$getvm */ fdl_descriptor.dsc$a_pointer = malloc(code); fdl_descriptor.dsc$w_length = code; code = mem_decompress(fdl_descriptor.dsc$a_pointer, code, &termin); if (code != fdl_descriptor.dsc$w_length) { fprintf(stderr, "\nError reading fdl attributes block,"); fprintf(stderr, " expected %d bytes, read %d bytes\n", fdl_descriptor.dsc$w_length, code); FAIL("can't get attribute block data", EXIT_FAILURE); } if ((show & SHOW_FDL) != 0) { fprintf(stderr, "\nFDL information read from \"%s\"\n", infilename); fdl_dump(&fdl_descriptor, stderr); } while (termin == LZ_EOR) { MORE_ATTRIBUTES *p; int len; code = mem_decompress(text, sizeof text - 1, &termin); text[code] = EOS; if ((show & SHOW_FDL) != 0) fprintf(stderr, "Additional header: \"%s\"\n", text); for (p = more_attributes;; p++) { if (p->name == NULL) { fprintf(stderr, "\nIgnoring unknown descriptor \"%s\"\n", text); break; } len = strlen(p->name); if (strncmp(p->name, text, len) == 0) { switch (p->type) { case INTEGER: *((int *)p->result) = atoi(&text[len]); break; case STRING: strcpy(p->result, &text[len]); break; } break; } } } if ((code = getcode()) != LZ_STX) { fprintf(stderr, "\nExpecting start of text, got 0x%x\n", code); FAIL("no start of text", EXIT_FAILURE); } } #else if ((export & EXPORT_VMS) != 0) FAIL("VMS private mode not supported", EXIT_FAILURE); #endif return (FALSE); } #if VMS_V4 int mem_decompress(buffer, size, termin) char_type *buffer; int size; int *termin; /* * Decompress up to size bytes to buffer. Return actual size. */ { mem_stream.bp = mem_stream.bstart = buffer; mem_stream.bend = buffer + size; mem_stream.bsize = size; mem_stream.func = lz_fail; if ((*termin = decompress(&mem_stream)) == LZ_EOR || *termin == LZ_ETX) return (mem_stream.bp - buffer); else { fprintf(stderr, "Decompress to memory failed.\n"); FAIL("can't decompress to memory", EXIT_FAILURE); } return (-1); /* Can't happen */ } #endif static readonly char *helptext[] = { "The following options are valid:", "-B\tBinary file (important on VMS/RSX, ignored on Unix)", "-F\tOutput fixed-block 512 byte records (VMS export only", "-M val\tSet the maximum number of code bits (unless header present)", "-V val\tPrint status information or debugging data.", "-X val\tSet export (compatibility) mode:", "-X 0\tVMS private mode", "-X 1\tCompatibility with Unix compress", "-X 2\tDo not read a header, disable \"block-compress\" mode", "\t(If a header is present, lzdcmp will properly configure itself,", "\toverriding the -X, -B and -M flag values.", NULL, }; static setup(argc, argv) int argc; char *argv[]; /* * Get parameters and open files. Exit fatally on errors. */ { register char *ap; register int c; char **hp; auto int i; int j; int temp; #ifdef vms argc = getredirection(argc, argv); /* * Prescan to see whether we must do a DCL parse. */ for (j = FALSE, i = 1; i < argc; i++) { if (argv[i][0] == '-') { j = TRUE; break; } } if (j == FALSE) { if ((i = lzdcl(argc, argv)) != SS$_NORMAL) exit(i); return; } #endif for (i = j = 1; i < argc; i++) { ap = argv[i]; if (*ap++ != '-' || *ap == EOS) /* Filename? */ argv[j++] = argv[i]; /* Just copy it */ else { while ((c = *ap++) != EOS) { if (islower(c)) c = toupper(c); switch (c) { case 'B': mode |= MODE_BINARY; break; case 'F': mode |= MODE_FIXED; break; case 'M': maxbits = getvalue(ap, &i, argv); if (maxbits < MIN_BITS) { fprintf(stderr, "Illegal -M value\n"); goto usage; } break; case 'V': show = getvalue(ap, &i, argv); break; case 'X': switch ((temp = getvalue(ap, &i, argv))) { case 0: export = EXPORT_VMS | EXPORT_BLOCK | EXPORT_HEADER | EXPORT_ENDMARKER; break; case 1: export = EXPORT_UNIX | EXPORT_BLOCK | EXPORT_HEADER | EXPORT_ENDMARKER; break; case 2: export = EXPORT_UNIX | EXPORT_BLOCK | EXPORT_HEADER; break; case 3: export = EXPORT_UNIX | EXPORT_HEADER; break; case 4: export = EXPORT_UNIX; break; default: fprintf(stderr, "Illegal -X value: %d\n", temp); goto usage; } break; default: fprintf(stderr, "Unknown option '%c' in \"%s\"\n", *ap, argv[i]); usage: for (hp = helptext; *hp != NULL; hp++) fprintf(stderr, "%s\n", *hp); FAIL("unknown option", EXIT_FAILURE); } /* Switch on options */ } /* Everything for -xxx */ } /* If -option */ } /* For all argc's */ /* infilename = NULL; */ /* Set "stdin" signal */ /* outfilename = NULL; */ /* Set "stdout" signal */ switch (j) { /* Any file arguments? */ case 3: /* both files given */ if (!streq(argv[2], "-")) /* But - means stdout */ outfilename = argv[2]; case 2: /* Input file given */ if (!streq(argv[1], "-")) infilename = argv[1]; break; case 0: /* None! */ case 1: /* No file arguments */ break; default: fprintf(stderr, "Too many file arguments\n"); FAIL("too many files", EXIT_FAILURE); } } static int getvalue(ap, ip, argv) register char *ap; int *ip; char *argv[]; /* * Compile a "value". We are currently scanning *ap, part of argv[*ip]. * The following are possible: * -x123 return (123) and set *ap to EOS so the caller * ap^ cycles to the next argument. * * -x 123 *ap == EOS and argv[*ip + 1][0] is a digit. * return (123) and increment *i to skip over the * next argument. * * -xy or -x y return(1), don't touch *ap or *ip. * * Note that the default for "flag option without value" is 1. This * can only cause a problem for the -M option where the value is * mandatory. However, the result of 1 is illegal as it is less * than INIT_BITS. */ { register int result; register int i; i = *ip + 1; if (isdigit(*ap)) { result = atoi(ap); *ap = EOS; } else if (*ap == EOS && argv[i] != NULL && isdigit(argv[i][0])) { result = atoi(argv[i]); *ip = i; } else { result = 1; } return (result); } openinput() { #ifdef decus if (infilename == NULL) { infilename = malloc(257); fgetname(stdin, infilename); infilename = realloc(infilename, strlen(infilename) + 1); } if (freopen(infilename, "rn", stdin) == NULL) { perror(infilename); FAIL("can't open compressed input", ERROR_EXIT); } #else #if VMS_V4 if ((export & EXPORT_VMS) != 0) { if (infilename == NULL) { infilename = malloc(256 + 1); fgetname(stdin, infilename); infilename = realloc(infilename, strlen(infilename) + 1); } if ((fdl_input = fdl_open(infilename, NULL)) == NULL) FAIL("can't open compressed input (vms private)", fdl_status); } else #endif { if (infilename == NULL) { #ifdef vms infilename = malloc(256 + 1); fgetname(stdin, infilename); infilename = realloc(infilename, strlen(infilename) + 1); #else infilename = ""; #endif } else { /* * "rb" means "read, binary mode" */ if (freopen(infilename, "rb", stdin) == NULL) { perror(infilename); FAIL("can't open compressed input (export)", ERROR_EXIT); } } } #endif instream.bp = instream.bend = NULL; instream.bstart = inbuffer; instream.bsize = sizeof inbuffer; instream.func = lz_fill; } openoutput() { #ifdef vms #if VMS_V4 if ((export & EXPORT_VMS) != 0) { extern FDLSTUFF *fdl_create(); fclose(stdout); stdout = NULL; fdl_output = fdl_create(&fdl_descriptor, outfilename, xab_lrl); if (fdl_output == NULL) { fprintf(stderr, "Can't create output file\n"); FAIL("can't create output (vms private)", fdl_status); } if (outfilename == NULL) { outfilename = malloc(256 + 1); fdl_getname(fdl_output, outfilename); outfilename = realloc(outfilename, strlen(outfilename) + 1); } } else #endif { /* * Not VMS Version 4, or export mode. */ if (outfilename == NULL) { outfilename = malloc(256 + 1); fgetname(stdout, outfilename); outfilename = realloc(outfilename, strlen(outfilename) + 1); if ((mode & (MODE_BINARY | MODE_FIXED)) == 0) goto do_reopen; } else { if ((mode & MODE_BINARY) != 0) { if (freopen(outfilename, "wb", stdout, "alq=256", "fop=tef") == NULL) { perror(outfilename); FAIL("can't create output (binary)", ERROR_EXIT); } } else if ((mode & MODE_FIXED) != 0) { if (freopen(outfilename, "wb", stdout, "alq=256", "fop=tef", "rfm=fix", "bls=512") == NULL) { perror(outfilename); FAIL("can't create output (fixed)", ERROR_EXIT); } } else { /* * Try to allocate the output file in chunks. */ do_reopen: if (freopen(outfilename, "w", stdout, "alq=256", "fop=tef", "rat=cr", "rfm=var") == NULL) { perror(outfilename); FAIL("can't create output (text)", ERROR_EXIT); } } } } #else #ifdef decus if (outfilename == NULL) { outfilename = malloc(256 + 1); fgetname(stdout, outfilename); outfilename = realloc(outfilename, strlen(outfilename) + 1); } if (freopen(outfilename, ((mode & (MODE_BINARY | MODE_FIXED)) != 0) ?"wn" : "w", stdout) == NULL) { perror(outfilename); FAIL("can't reopen", ERROR_EXIT); } #else if (outfilename == NULL) outfilename = ""; else { if (freopen(outfilename, "w", stdout) == NULL) { perror(outfilename); FAIL("can't create", ERROR_EXIT); } } #endif #endif outstream.bp = outstream.bstart = outbuffer; outstream.bend = outbuffer + sizeof outbuffer; outstream.bsize = sizeof outbuffer; outstream.func = lz_flush; } -h- lzdcm2.c Sat Mar 26 16:57:08 1988 USER1:[MINOW.PERSONAL.SOURCE.LZ]LZDCM2.C;17 /* * l z d c m 2 . c * * Actual decompression code */ #include "lz.h" /* * These global parameters are read from the compressed file. * The decompressor needs them. */ extern short maxbits; /* settable max # bits/code */ extern code_int maxmaxcode; /* 1 << maxbits */ static flag first_clear = TRUE; /* * Big data storage stuff */ static char_type stack[MAXSTACK]; #define STACK_TOP (&stack[MAXSTACK]) static U_short tab_prefix[1 << BITS]; /* prefix code */ static char_type tab_suffix[1 << BITS]; /* last char in string */ code_int next_code; #if DEBUG #define CHECK(what) \ if (stp <= stack) { \ fprintf(stderr, "Stack overflow -- %s\n", what); \ abort(); \ } #else #define CHECK(what) #endif int decompress(out) STREAM *out; /* * Decompress instream (global) to out. Returns "end" signal: * -1 end of file * LZ_EOR end of record * LZ_ETX end of segment */ { register char_type *stp; /* Stack pointer */ register code_int code, oldcode, incode; register int final; /* End of a sequence? */ register char_type *pstp; /* Stack pointer and */ register int previous; /* char for diff flavor */ extern code_int getcode(); stp = STACK_TOP; final = oldcode = getcode(); PUT((char) final, out); previous = final; while ((code = getcode()) >= 0) { test: if (code >= LZ_CLEAR && code < firstcode) { if ((mode & MODE_DELTA) != 0) { for (pstp = stp; pstp < STACK_TOP;) { #if UCHAR previous += *pstp; #else previous += (*pstp & 0xFF); #endif *pstp++ = previous; } } lz_putbuf(stp, STACK_TOP - stp, out); stp = STACK_TOP; switch (code) { case LZ_ETX: case LZ_EOR: goto finish; case LZ_SOH: /* Unexpected */ case LZ_STX: /* Unexpected */ default: fprintf(stderr, "\nUnexpected control 0x%X\n", code); FAIL("Unexpected control", EXIT_FAILURE); case LZ_CLEAR: init_decompress(); /* Before getcode() !! */ if ((code = getcode()) < 0 || ((export & (EXPORT_UNIX | EXPORT_ENDMARKER)) == (EXPORT_UNIX | EXPORT_ENDMARKER) && code == LZ_CLEAR)) goto finish; else { /* * init_decompress has set next_code to firstcode, * however, for magical reasons, we want to toss * the next substring, so we set next_code so * that the tab_... entry is effectively ignored. * Note below that tab_prefix[next_code] is set * to the character before the LZ_CLEAR and * tab_suffix to the character after the LZ_CLEAR. * But, these values have no relationship to one * another, so, by decrementing next_code, they * will be ignored. (I think.) */ next_code--; goto test; } } } incode = code; /* * Special case for KwKwK string. */ if (code >= next_code) { CHECK("KwKwK"); *--stp = final; code = oldcode; } /* * Generate output characters in reverse order */ #ifdef interdata while (((unsigned long) code) >= ((unsigned long) NBR_CHAR)) { #else while (code >= NBR_CHAR) { #endif CHECK("generate output"); *--stp = tab_suffix[code]; code = tab_prefix[code]; } CHECK("final char"); *--stp = final = tab_suffix[code]; /* * And put them out in forward order */ if ((mode & MODE_DELTA) != 0) { for (pstp = stp; pstp < STACK_TOP;) { #if UCHAR previous += *pstp; #else previous += (*pstp & 0xFF); #endif *pstp++ = previous; } } lz_putbuf(stp, STACK_TOP - stp, out); stp = STACK_TOP; /* * Generate the new entry. */ if ((code = next_code) < maxmaxcode) { tab_prefix[code] = (U_short) oldcode; tab_suffix[code] = final; next_code++; } /* * Remember previous code. */ oldcode = incode; } finish: return (code); } init_decompress() /* * Called on cold start, or on LZ_SOH, LZ_STX, LZ_CLEAR. */ { register char_type *cp; register U_short *up; register int code; if (first_clear) { for (cp = &tab_suffix[0], code = 0; cp < &tab_suffix[NBR_CHAR];) *cp++ = code++; first_clear = FALSE; } else { #if ((NBR_CHAR % 8) != 0) << error, the following won't work >> #endif for (up = &tab_prefix[0]; up < &tab_prefix[NBR_CHAR];) { *up++ = 0; *up++ = 0; *up++ = 0; *up++ = 0; *up++ = 0; *up++ = 0; *up++ = 0; *up++ = 0; } } next_code = firstcode; } #if DEBUG dump_tab(dumpfd) FILE *dumpfd; /* * dump string table */ { register char_type *stp; /* Stack pointer */ register int i; register int ent; extern char *dumpchar(); stp = STACK_TOP; fprintf(dumpfd, "%d %s in string table\n", next_code, (next_code == 1) ? "entry" : "entries"); for (i = 0; i < next_code; i++) { fprintf(dumpfd, "%5d: %5d/'%s' ", i, tab_prefix[i], dumpchar(tab_suffix[i])); for (ent = i;;) { *--stp = tab_suffix[ent]; if (ent < firstcode) break; ent = tab_prefix[ent]; } dumptext(stp, STACK_TOP - stp, dumpfd); stp = STACK_TOP; } } #endif -h- lzdcm3.c Sat Mar 26 16:57:08 1988 USER1:[MINOW.PERSONAL.SOURCE.LZ]LZDCM3.C;22 /* * l z d c m 3 . c * * Read codes from the input stream. */ #include "lz.h" #if !vax_asm && !vms_asm static readonly char_type rmask[9] = { 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F, 0xFF }; #endif #if DEBUG extern int col; static int todump; unsigned long code_count; unsigned long dump_first = -1; unsigned long dump_last = -1; #endif /* * getcode() * * Read one code from the standard input. If EOF, return -1. * Inputs: * stdin (via GET) * Outputs: * code or -1 is returned. */ extern code_int next_code; extern STREAM instream; extern code_int maxmaxcode; extern short maxbits; static short n_bits = INIT_BITS; static code_int maxcode = MAXCODE(INIT_BITS); /* * buf[] contains 8-bit data read from the input stream. getcode() * treats buf[] as a vector of bits, repacking it into variable-bit codes. */ static char_type buf[BITS]; static int offset = 0; /* Offset into buf IN BITS */ static int size = 0; /* Actual size of buf IN BITS */ code_int getcode() { /* * On the VAX (4.2 bsd), it is important to have the register * declarations in exactly the order given, or the asm will break. */ register code_int code; /* R11 extracted code */ register int r_off; /* R10 offset */ register int bits; /* R09 bits/code */ register char_type *bp; /* R08 buffer pointer */ bp = buf; if (next_code > maxcode) { n_bits++; if (n_bits == maxbits) maxcode = maxmaxcode; else { maxcode = MAXCODE(n_bits); } size = 0; #if DEBUG if ((show & SHOW_DEBUG) != 0) { fprintf(stderr, "\nChange to %d bits", n_bits); col = 74; } #endif } if (offset >= size) { size = lz_getbuf(buf, n_bits, &instream); #if DEBUG if ((show & SHOW_SERIOUS_DEBUG) != 0 || todump > 0 || (code_count >= dump_first && code_count <= dump_last)) { fprintf(stderr, "code_count %lu\n", code_count); dumphex(buf, size, stderr); if (todump > 0) todump -= size; } #endif if (size <= 0) return (-1); /* end of file */ offset = 0; /* * Round size down to integral number of codes in the buffer. * (Expressed as a number of bits). */ size = (size << 3) - (n_bits - 1); } r_off = offset; bits = n_bits; #if vax_asm asm("extzv r10,r9,(r8),r11"); #else #if vms_asm code = lib$extzv(&offset, &n_bits, bp); #else /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* * Get first part (low order bits) */ #if UCHAR code = (*bp++ >> r_off); #else /* * Don't touch the 0xFF; it prevents sign extension. */ code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xFF; #endif bits -= (8 - r_off); r_off = 8 - r_off; /* now, offset into code word */ /* * Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if (bits >= 8) { #if UCHAR code |= *bp++ << r_off; #else code |= (*bp++ & 0xFF) << r_off; #endif r_off += 8; bits -= 8; } /* high order bits. */ #if UCHAR code |= (*bp & rmask[bits]) << r_off; #else code |= (*bp & rmask[bits]) << r_off; #endif /* * End of non-vax (Unix) specific code. */ #endif #endif offset += n_bits; if (code >= LZ_CLEAR && code < firstcode) { switch (code) { case LZ_SOH: case LZ_STX: case LZ_CLEAR: size = 0; /* Force read next time */ n_bits = INIT_BITS; maxcode = MAXCODE(INIT_BITS); #if DEBUG if ((show & SHOW_DEBUG) != 0) { fprintf(stderr, "Read %s (%d)\n", lz_names[code - LZ_CLEAR], code); todump = 32; } #endif break; } } #if DEBUG if ((show & SHOW_SERIOUS_DEBUG) != 0 || (code_count >= dump_first && code_count <= dump_last)) { fprintf(stderr, "%c%5d %5d", ((col += 12) >= 72) ? (col = 0, '\n') : ' ', code, next_code); if (code >= LZ_CLEAR && code < firstcode) { fprintf(stderr, " = %s", lz_names[code - LZ_CLEAR]); col = 74; } } ++code_count; #endif return (code); } -h- lz.hlp Sat Mar 26 16:57:08 1988 USER1:[MINOW.PERSONAL.SOURCE.LZ]LZ.HLP;6 ! ! compress ! 1 COMPRESS The COMPRESS command invokes a utility to copy a file, generating a file with (usually) fewer bytes. Files compressed by COMPRESS are recovered by DECOMPRESS. COMPRESS Input-file-spec Output-file-spec 2 Parameter Input-file-spec Specifies the name of the file to be compressed. If you have specified /EXPORT=VMS mode, the file must be stored on a disk. Output-file-spec Specifies the name of the file created by COMPRESS. 2 Qualifiers Indicate special actions to be performed by the COMPRESS utility or special properties of either the input or output files. Qualifiers apply to the entire process. The following list shows all the qualifiers available with the COMPRESS command: o /BITS=value o /EXPORT=(option,...) o /METHOD=option o /MODE=(option,...) o /SHOW=(option,...) 2 /BITS /BITS=value This specifies the maximum number of bits to be used in the compression. It implicitly controls both the "quality" of the compression (more bits means more compression) and the amount of memory needed for both compression and decompression (more bits requires more memory). If the compressed file is to be read by a computer with limited memory (such as a PDP-11), choose /BITS=12, else leave BITS at its default of 16. The minimum value is 9 and the maximum value is 16. 2 /EXPORT /EXPORT=(option, [,...]) Export controls the format of the output file. You can select the following: VMS (D) Write a file that can only be read by VMS COMPRESS. UNIX Write a format that can be read by programs compatible with the Unix compress utility. [NO]ENDMARKER Write a special file endmarker after the data if specified. [NO]BLOCK Monitor compression and reinitialize if the quality decreases if specified. [NO]HEADER Write a file header with information for DECOMPRESS if specified. In general, use /EXPORT=VMS for compression where the result will be decompressed on a VMS system and /EXPORT=UNIX where the result will be decompressed on a Unix, RSX-11M, RSTS/E, or other non-VMS system. If /EXPORT=UNIX is specified, BLOCK, HEADER, or ENDMARKER may be negated to further qualify the output file format. 3 VMS Specifies output in VMS ("private") mode. In addition to the contents of the file itself, the "File definition block" is also compressed. The decompression utility can thus recreate the file exactly (including ISAM indexes). This is the default, and the recommended mode for most uses. 3 UNIX Specifies an output format compatible with Unix compress v3.0. This allows transmitting sequential files to non-VMS systems that support a compress-compatible utility. If you have specified /EXPORT=UNIX, the utility can be configured for variants of Unix compress by negating BLOCK, HEADER, and/or ENDMARKER as needed. Notice that file attributes are not preserved by /EXPORT=UNIX. 3 BLOCK Selects an algorithm whereby COMPRESS evaluates its performance and re-initializes the compression tables whenever performance degrades. Older versions of Unix compress do not support this capability. If negated, ENDMARKER must also be negated. 3 HEADER If negated, COMPRESS does not write a header record. This is for compatiblity with very old versions of Unix compress. If negated, BLOCK and ENDMARKER must also be negated. 3 ENDMARKER If specified, a special "endmark" is written after the end of the file. This is necessary if the file is to be decompressed on RT11 or other systems that require the last block of a file to fill the last block. On the other hand, some versions of Unix compress cannot understand the "extra" endmarker. If you guess wrong, a few bytes of garbage may be appended to the decompressed file. A version of Unix compress that handles endmarkers correctly is available. 2 /METHOD /METHOD=(option) This specifies the particular compression algorithm. Currently, only /METHOD=LZW is supported. 3 LZW Use the Lempel-Ziv-Welch compression algorithm. 2 /MODE /MODE=(option) This allows specification of variations on the compression method. 3 BINARY This opens the file in "binary" mode, rather than "text" mode. It is ignored if /EXPORT=VMS is chosen. 3 DELTA Compress the difference between successive bytes, rather than the bytes themselves. For certain file formats, such as bit-mapped graphics, this may yield a 10-15% improvement in compressibility. This is not compatible with some implementations of Unix compress. 2 /SHOW /SHOW=(option, [,...]) Display information about the compression. If omitted, COMPRESS operates silently (except for error messages). 3 ALL Equivalent to /SHOW=(PROGRESS,STATISTICS,FDL) 3 PROGRESS Print status messages at intervals, showing the operation of the program. The report shows the current compression ratio (the ratio of input to output bytes). If this decreases, COMPRESS decides that the characteristics of the file have changed, and resets its internal parameters. The "gap" is the number of input codes used to compute the ratio. 3 STATISTICS Print a report at the end of the process. Note that COMPRESS reports the number of bytes it compresses, which includes the file as well as the information that COMPRESS records about the file (the File Definition Language block and some internal codes), and will therefore be several hundred characters greater than the actual size of the file. 3 FDL Dump the File Definition Language block that describes a VMS input file. 3 DEBUG Print internal debugging information. 3 DEBUG_SERIOUS Print more internal debugging information. 3 DEBUG_IO Dump the output file, too. 2 LZW_Overview LZW stands for a compression method described in "A technique for High Performance Data Compression." Terry A. Welch. IEEE Computer, Vol 17, No. 6 (June 1984) pp. 8-19. This section is abstracted from Terry Welch's article referenced below. The algorithm builds a string translation table that maps substrings in the input into fixed-length codes. The compress algorithm may be described as follows: 1. Initialize table to contain single-character strings. 2. Read the first character. Set (the prefix string) to that character. 3. (step): Read next input character, K. 4. If at end of file, output code(); exit. 5. If K is in the string table: Set to K; goto step 3. 6. Else K is not in the string table. Output code(); Put K into the string table; Set to K; Goto step 3. "At each execution of the basic step an acceptable input string has been parsed off. The next character K is read and the extended string K is tested to see if it exists in the string table. If it is there, then the extended string becomes the parsed string and the step is repeated. If K is not in the string table, then it is entered, the code for the successfully parsed string is put out as compressed data, the character K becomes the beginning of the next string, and the step is repeated." The decompression algorithm translates each received code into a prefix string and extension [suffix] character. The extension character is stored (in a push-down stack), and the prefix translated again, until the prefix is a single character, which completes decompression of this code. The entire code is then output by popping the stack. I.e., the last code put into the stack was the first code in the original file. "An update to the string table is made for each code received (except the first one). When a code has been translated, its final character is used as the extension character, combined with the prior string, to add a new string to the string table. This new string is assigned a unique code value, which is the same code that the compressor assigned to that string. In this way, the decompressor incrementally reconstructs the same string table that the decompressor used.... Unfortunately ... [the algorithm] does not work for an abnormal case. The abnormal case occurs whenever an input character string contains the sequence KKK, where K already appears in the compressor string table." The decompression algorithm, augmented to handle the abnormal case, is as follows: 1. Read first input code; Store in CODE and OLDcode; With CODE = code(K), output(K); FINchar = K; 2. Read next code to CODE; INcode = CODE; If at end of file, exit; 3. If CODE not in string table (special case) then Output(FINchar); CODE = OLDcode; INcode = code(OLDcode, FINchar); 4. If CODE == code(K) then Push K onto the stack; CODE == code(); Goto 4. 5. If CODE == code(K) then Output K; FINchar = K; 6. While stack not empty Output top of stack; Pop stack; 7. Put OLDcode,K into the string table. OLDcode = INcode; Goto 2. The algorithm as implemented here introduces two additional complications. The actual codes are transmitted using a variable-length encoding. The lowest-level routines increase the number of bits in the code when the largest possible code is transmitted. Periodically, the algorithm checks that compression is still increasing. If the ratio of input bytes to output bytes decreases, the entire process is reset. This can happen if the characteristics of the input file change. (This can be supressed by /EXPORT=(UNIX, NOBLOCK)). 2 Unix Is a trademark of AT&T Bell Laboratories. ! ! decompress ! 1 DECOMPRESS The DECOMPRESS command invokes a utility to restore copy a file that had been compressed by COMPRESS. DECOMPRESS Input-file-spec [Output-file-spec] 2 Command_Parameters Input-file-spec Specifies the name of the compressed input file. Output-file-spec Specifies the name of the file created by COMPRESS. If the file was compressed by /EXPORT=VMS, the original file name will be used if the Output file spec. is omitted. If the file was compressed by /EXPORT=UNIX and no Output file spec. is provided, the file will be written to SYS$OUTPUT: 2 Command_Qualifiers Indicate special actions to be performed by the COMPRESS utility or special properties of either the input or output files. Qualifiers apply to the entire process. The following list shows all the qualifiers available with the DECOMPRESS command: o /BITS=value o /EXPORT=(option,...) o /METHOD=option o /MODE=(option,...) o /SHOW=(option,...) 2 /BITS /BITS=value If a header was not provided, this specifies the maximum number of bits that were used in the compression. This parameter is ignored if the compressed file contains a header. 2 /EXPORT /EXPORT=(option, [,...]) Export describes the format of the input file. You can select the following: VMS (D) The file was created by VMS COMPRESS. UNIX The file was created by Unix compress or a compatible program. compatible with the Unix compress utility. [NO]ENDMARKER A special file endmarker follows the data. [NO]BLOCK The compress program may have reinitialized compression. [NO]HEADER The compress program wrote its parameters into a file header. In general, the program can determine the proper value of these flags by reading the first few bytes of the file. If valid, the file header overrides the command line specification. Generally, this option is needed only if you are trying to read a file generated by a version of Unix compress that did not write a header. See the description of COMPRESS for details. 2 /METHOD /METHOD=(option) This specifies the particular compression algorithm. Currently, only /METHOD=LZW is supported. 3 LZW Use the Lempel-Ziv-Welch compression algorithm. 2 /MODE /MODE=(option) This allows specification of variations on the output file format. These values will be taken from the source file description if /EXPORT=VMS is chosen. 3 BINARY This creates the file in "binary" mode, rather than "text" mode. It is ignored if COMPRESS created the file in /EXPORT=VMS mode. The output file will be created in RMS "Stream-LF" format. 3 DELTA Compress used the difference between successive bytes, rather than the bytes themselves. For certain file formats, such as bit-mapped graphics, this may yield a 10-15% improvement in compressibility. This is not compatible with some implementations of Unix compress. This value is normally read from the file header, and generally need not be specified by DECOMPRESS. 3 FIXED Create the file in "fixed-block, 512-byte record" format. This is probably the best format to use for decompressing binary files (such as tar archives) created on Unix. 3 TEXT Create the file in "variable-length carriage-control" format. This is appropriate for decompressing readable text files created by Unix compress. 2 /SHOW /SHOW=(option, [,...]) Display information about the compression. If omitted, DECOMPRESS operates silently (except for error messages). 3 ALL Equivalent to /SHOW=(PROGRESS,STATISTICS,FDL) 3 PROGRESS Print status messages at intervals, showing the operation of the program. 3 STATISTICS Print a report at the end of the process. 3 FDL Dump the File Definition Language block that describes the output file. 3 DEBUG Print internal debugging information. 3 DEBUG_SERIOUS Print more internal debugging information. 3 DEBUG_IO Dump the input file, too. 2 Unix Is a trademark of AT&T Bell Laboratories.