Path: seismo!harvard!talcott!panda!sources-request From: sources-request@panda.UUCP Newsgroups: mod.sources Subject: infer - inference engine Message-ID: <1376@panda.UUCP> Date: 5 Feb 86 23:58:20 GMT Sender: jpn@panda.UUCP Lines: 949 Approved: jpn@panda.UUCP Mod.sources: Volume 3, Issue 104 Submitted by: itm!danny (Daniel S. Cox) The following is a shell archive containing "infer" which is a re-write of George Hageman's routines "rulecomp" and "inference". Enjoy! Daniel S. Cox ihnp4!akgua!itm!danny -------------------- Cut Here -------------------------------------- ############################################################ # # infer.ar # # To extract the files from this shell archive file # simply create a directory for this file # and move the archive file to it, strip off these comments # and enter the command # # sh filename # # Do not use csh # The files will be extracted automatically # ############################################################ echo "Extracting README <-- infer.ar" cat << \===README=== > README infer --- an inference engine written in C, based on George Hageman's programs "rulecomp", and "inference". Unfortunatly, not much is left of the original sources. I began by copying the expert.h file into infer.h, but now, the only things left from the original are some names, and the keywords it recognizes. Now, down to building the thing. First, edit the Makefile and modify the BIN and CFLAGS variables to be compatible with your system. Type "make" and stand well back. To crank it up type "infer file" where file contains some rules. The knowledge base named "animal" is included, and is a copy of the original. Infer understands one option, "-v" for verbose. It will display each line it compiles to stdout, and display each rule it considers as it contorts. It's not espcially useful output, but, boy, is it verbose! As it asks a question, infer understands [YyTt] for TRUE, [NnFf] for FALSE, [Qq] for quit, and [Ww] for why. The why only shows rules proven TRUE, and the rule under consideration. If infer detects circular logic, it will attempt to display each rule in the circle. This should help isolate things pretty well. Error messages are intended to be self-explanatory. It mostly complains about unopenable files, and improperly formed lines. If you find usefulness in this program, drop me a line. I'd be interested in knowing where it's used. If you need to contact me, my UUCP path is: ihnp4!akgua!itm!danny US Snail is: In Touch 796 West Peachtree St. NE Atlanta, GA 30308 ATTN Daniel S. Cox (404) 881-0550 IF I think THENHYP I am Daniel (Call me Danny) S. Cox ===README=== echo "Extracting Makefile <-- infer.ar" cat << \===Makefile=== > Makefile CFLAGS = -O LDFLAGS = -n OBJECTS = infer.o compile.o SOURCES = infer.c compile.c BIN = /u/danny/bin infer: $(OBJECTS) $(CC) $(OBJECTS) $(LDFLAGS) -o infer $(OBJECTS): infer.h clean: rm -f $(OBJECTS) core clobber: clean rm -f infer install: infer cp infer $(BIN)/infer strip $(BIN)/infer lint: lint $(SOURCES) >fluff ===Makefile=== echo "Extracting infer.c <-- infer.ar" cat << \===infer.c=== > infer.c /* infer --- inference engine */ # include # include # include "infer.h" # define TRUTHVAL(E) ((E->type & NOT) ? (E->str->val == TRUE) ? FALSE : TRUE : (E->str->val)) char *progname; RULE_T *Rule[MAXRULE]; int nrules = 0; RULE_T *why[MAXWHY]; int nwhy = 0; STR *SP = 0; int verbose; main (argc, argv) int argc; char **argv; { FILE *fp; progname = argv[0]; while (argv[1][0] == '-') { /* check for options */ if (argv[1][1] == 'v') /* I know only one */ verbose++; else fprintf (stderr, "%s: unknown arg \"%s\"\n", progname, argv[1]); argc--; argv++; } if (argc !=2) { fprintf (stderr, "Usage: %s [-v] file\n", progname); exit (1); } if ((fp = fopen (argv[1], "r")) == NULL) { fprintf (stderr, "%s: can't open %s (%s)\n", progname, argv[1], sys_errlist[errno]); exit (1); } compile (fp); fclose (fp); return (infer ()); } /* infer --- inference engine */ infer () { register int i; int proved; for (i = 0; i < nrules; i++) { /* verify each CON */ if (Rule[i]->con == 0) { fprintf (stderr, "%s: RULE has no THENs:\n", progname); prrule (Rule[i], stderr); exit (1); } if (TRUTHVAL (Rule[i]->con) == TRUE) continue; if (verify (Rule[i]) == TRUE) { register ELEMENT_T *e; for (e = Rule[i]->con; e; e = e->next) { if (e->type & ROUTINE) { if (e->str->val == TRUE) continue; if (run (e) == TRUE) { e->str->val = TRUE; if (e->type & HYP) { printf ("Conclusion\n"); return (0); } } else { e->str->val = FALSE; } } else { e->str->val = TRUE; proved = TRUE; printf ("I infer that: %s\n", e->str->p); if (e->type & HYP) { printf ("Conclusion\n"); return (0); } } } } } if (proved == FALSE) { printf ("I can't prove anything!!!\n"); return (1); } return (0); } /* verify --- verify a CON. May be called recursivly */ verify (rule) register RULE_T *rule; { register ELEMENT_T *e; register int i; pushwhy (rule); if (rule->con->str->val == TRUE) { popwhy (); return (TRUE); } if (verbose) prrule (rule, stdout); if (rule->ant == 0) { fprintf (stderr, "%s: RULE has no IFs:\n", progname); prrule (rule, stderr); popwhy (); return (TRUE); } for (e = rule->ant; e; e = e->next) { /* for each ANT */ for (i = 0; i < nrules; i++) if (e->str == Rule[i]->con->str) /* this ANT is a CON */ break; if (i == nrules) { /* NOT a CON */ if (prove (e) == TRUE) continue; else { popwhy (); return (FALSE); } } else { int anytrue = 0; for (i = 0; i < nrules; i++) { if (e->str == Rule[i]->con->str) { /* match */ int ret; if (Rule[i]->vfy == 1) { fprintf (stderr, "%s: Circular logic in Rules! Rules are:\n", progname); prcirc (); exit (1); } Rule[i]->vfy = 1; ret = verify (Rule[i]); Rule[i]->vfy = 0; if (ret == TRUE) { register ELEMENT_T *f; anytrue++; for (f = Rule[i]->con; f; f = f->next) { if (f->str->val == UNKNOWN) { printf ("I infer that: %s\n", f->str->p); f->str->val = TRUE; } } if (prove (e) == TRUE) break; else { popwhy (); return (FALSE); } } } } if (!anytrue) if (e->type & NOT) continue; else { popwhy (); return (FALSE); } else if (e->type & NOT) { popwhy (); return (FALSE); } else continue; } } /* don't pop the why stack if TRUE */ return (TRUE); } /* prove --- prove this ANT (may be already proven) */ prove (e) ELEMENT_T *e; { if (e->str->val != UNKNOWN) return (TRUTHVAL (e)); if (e->type & ROUTINE) return (run (e)); else return (askval (e)); } /* askval --- get truth from user */ askval (e) register ELEMENT_T *e; { char line[80]; register int value; value = UNKNOWN; for (;;) { printf ("Is the following statement true?\n"); printf ("%s : ", e->str->p); if (fgets (line, sizeof (line), stdin) == NULL) *line = 'q'; if (isupper (*line)) *line = tolower (*line); switch (*line) { case 'y': case 't': value = TRUE; break; case 'n': case 'f': value = FALSE; break; case 'q': printf ("OK, Bye!\n"); exit (0); break; case 'w': showwhy (); continue; } if (value != UNKNOWN) break; printf ("How about typeing t/f/y/n?\n"); } e->str->val = value; return (TRUTHVAL (e)); } /* run --- execute this string */ run (e) register ELEMENT_T *e; { register int value, ret; if ((ret = system (e->str->p)) < 0) { printf ("%s: can't execute %s (%s) returning TRUE\n", progname, e->str->p, sys_errlist[errno]); value = TRUE; } else if (ret == 0) value = TRUE; else value = FALSE; e->str->val = value; return (TRUTHVAL (e)); } /* prrule --- print this rule in a readable form */ prrule (rule, fp) register RULE_T *rule; FILE *fp; { register ELEMENT_T *e; char *prval (), *prtype (); for (e = rule->ant; e; e = e->next) fprintf (fp, "%s %s (%s)\n", prtype (e->type, "IF"), e->str->p, prval (e)); for (e = rule->con; e; e = e->next) fprintf (fp, "%s %s (%s)\n", prtype (e->type, "THEN"), e->str->p, prval (e)); fprintf (fp, "\n"); } /* prval --- return the char representation of this value */ char * prval (e) register ELEMENT_T *e; { if (e->str->val == UNKNOWN) return ("UNKNOWN"); else if (e->str->val == TRUE || e->str->val == FALSE) { if (TRUTHVAL (e) == TRUE) return ("TRUE"); else return ("FALSE"); } else return ("BADVAL"); } /* prtype --- return the char representation of this type */ char * prtype (type, word) register short type; register char *word; { static char str_type[20]; strcpy (str_type, word); if (type & NOT) strcat (str_type, "NOT"); if (type & ROUTINE) strcat (str_type, "RUN"); if (type & HYP) strcat (str_type, "HYP"); return (str_type); } /* pushwhy --- push this rule onto the "why" stack */ pushwhy (r) register RULE_T *r; { if (nwhy >= MAXWHY) { fprintf (stderr, "%s: blew why stack!\n", progname); exit (1); } why[nwhy++] = r; } /* popwhy --- pop a value off of the "why" stack */ popwhy () { if (--nwhy < 0) { fprintf (stderr, "%s: why stack underflow!\n", progname); exit (1); } } /* showwhy --- print the details of the "why" stack */ showwhy () { register int i; for (i = 0; i < nwhy; i++) prrule (why[i], stdout); } /* prcirc --- print the details of the circular loop */ prcirc () { register int i; for (i = 0; i < nrules; i++) if (Rule[i]->vfy == 1) prrule (Rule[i], stderr); } ===infer.c=== echo "Extracting compile.c <-- infer.ar" cat << \===compile.c=== > compile.c /* compile --- compile the rules into the internal form */ # include # include # include "infer.h" int Curline; int State; compile (fp) register FILE *fp; { register int key; char line[STRSIZE]; STR *savestr (); Curline = 0; State = ANT; rulealloc (); while (fgets (line, sizeof (line), fp)) { Curline++; squeeze (line, '\n'); stripbl (line); if (verbose) printf ("%4d %s\n", Curline, line); key = getkey (line); if (key == NONE) fprintf (stderr, "%s: no keyword found on line %d\n", progname, Curline); else if (key == COMMENT) ; else push (key, savestr (line)); } } /* newstate --- if switching from CON to ANT, start a new rule */ newstate (new) register int new; { if (new != ANT && new != CON) { /* paranoia */ fprintf (stderr, "%s: bad new val: %d\n", new); exit (1); } if (State != new) { if (State == CON) rulealloc (); State = new; } } /* push --- add an element to this rule */ push (type, ptr) register int type; register STR *ptr; { register ELEMENT_T *e, *last; if (ptr == 0) /* keyword only, ignore */ return; e = (ELEMENT_T *) emalloc ((unsigned) sizeof (ELEMENT_T)); e->str = ptr; e->type = type; e->next = 0; if (State == CON) { if (Rule[nrules-1]->con == 0) /* first element */ Rule[nrules-1]->con = e; else /* place on end */ for (last = Rule[nrules-1]->con; last; last = last->next) if (last->next == 0) { last->next = e; break; } } else { if (Rule[nrules-1]->ant == 0) Rule[nrules-1]->ant = e; else for (last = Rule[nrules-1]->ant; last; last = last->next) if (last->next == 0) { last->next = e; break; } } } /* savestr --- skip 1st word, save rest of line in str buffer */ STR * savestr (line) char *line; { register char *s; register STR *sp; for (s = line; *s; s++) /* skip to 1st space */ if (*s == ' ') break; while (isspace (*s)) /* skip to 1st non-space */ s++; if (*s == 0) { fprintf (stderr, "%s: line %d has nothing but a keyword\n", progname, Curline); return (0); } for (sp = SP; sp; sp = sp->next) /* is string already present? */ if (strcmp (sp->p, s) == 0) return (sp); sp = (STR *) emalloc ((unsigned) sizeof (STR)); /* new string */ sp->p = emalloc ((unsigned) strlen (s)+1); strcpy (sp->p, s); sp->val = UNKNOWN; sp->next = SP; /* place at head */ SP = sp; return (sp); } /* getkey --- determine the keyword on this line */ getkey (line) char *line; { register int i; register char *s, *p; char word[20]; static struct { char *name; short type; short newstate; } keytab[] = { /* these are sorted based on frequency */ "THEN", STRING, CON, /* of use in 3 files I had access to */ "AND", STRING, ANT, /* change at will */ "IF", STRING, ANT, "ANDRUN", ROUTINE, ANT, "ANDNOT", STRING|NOT, ANT, "THENHYP", STRING|HYP, CON, "IFRUN", ROUTINE, ANT, "ANDIF", STRING, ANT, "IFNOT", STRING|NOT, ANT, "THENRUNHYP", ROUTINE|HYP, CON, "THENRUN", ROUTINE, CON, "ANDIFRUN", ROUTINE, ANT, "ANDNOTRUN", ROUTINE|NOT, ANT, "IFNOTRUN", ROUTINE|NOT, ANT, "ANDTHEN", STRING, CON, "ANDTHENHYP", STRING|HYP, CON, "ANDTHENRUN", ROUTINE, CON, "ANDTHENRUNHYP", ROUTINE|HYP, CON, 0, 0, 0 }; if (line[0] == COMMENT_CHAR) return (COMMENT); for (p = word, s = line; *s; s++) /* copy chars into word */ if (isupper (*s)) *p++ = *s; else break; *p = 0; for (i = 0; keytab[i].name; i++) /* look for match */ if (strcmp (word, keytab[i].name) == 0) { newstate (keytab[i].newstate); return (keytab[i].type); } return (NONE); } /* squeeze --- squeeze all c from s */ squeeze (s, c) register char *s; register char c; { register char *p; for (p = s; *s; s++) if (*s != c) *p++ = *s; *p = '\0'; } /* stripbl --- remove trailing blanks from s */ stripbl (s) register char *s; { register char *p; for (p = s; *s; s++) if (*s != ' ') p = s+1; *p = 0; } /* rulealloc --- allocate a new rule, and advance the pointer */ rulealloc () { if (nrules >= MAXRULE) { fprintf (stderr, "%s: too many rules!\n", progname); exit (1); } Rule[nrules] = (RULE_T *) emalloc ((unsigned) sizeof (RULE_T)); Rule[nrules]->ant = 0; Rule[nrules]->con = 0; Rule[nrules]->vfy = 0; nrules++; } /* emalloc --- call malloc and check return */ char * emalloc (n) unsigned n; { char *p, *malloc (); if ((p = malloc (n)) == NULL) { fprintf (stderr, "%s: Out of memory!\n", progname); exit (1); } return (p); } ===compile.c=== echo "Extracting animal <-- infer.ar" cat << \===animal=== > animal ! THIS IS THE KNOWLEDGE BASE FOR THE ! ANIMAL CLASIFICATION EXPERT ! IF ANIMAL HAS FEATHERS ANDIF ANIMAL LAYS EGGS THEN ANIMAL IS BIRD ! IFNOT ANIMAL IS BIRD THEN ANIMAL IS MAMMAL ! IF ANIMAL IS MAMMAL AND ANIMAL EATS MEAT THEN ANIMAL IS CARNIVORE ! IF ANIMAL IS CARNIVORE AND ANIMAL HAS POINTED TEETH AND ANIMAL HAS RETRACTABLE CLAWS AND ANIMAL HAS FORWARD POINTING EYES THEN ANIMAL IS CAT ! IF ANIMAL IS MAMMAL ANDNOT ANIMAL IS CARNIVORE AND ANIMAL HAS HOOFS THEN ANIMAL IS UNGULATE ! IF ANIMAL IS CAT AND ANIMAL HAS TAWNY COLOR AND ANIMAL HAS DARK SPOTS THENHYP ANIMAL IS CHEETA ! IF ANIMAL IS CAT AND ANIMAL HAS TAWNY COLOR AND ANIMAL HAS BLACK STRIPES THENHYP ANIMAL IS TIGER ! IF ANIMAL IS CAT AND ANIMAL IS SMALL AND ANIMAL IS DOMESTICATED AND ANIMAL MOSTLY CATCHES MICE AND ANIMAL LOVES WARM LAPS THENHYP ANIMAL IS A HOUSE CAT ! IF ANIMAL IS UNGULATE AND ANIMAL HAS LONG NECK AND ANIMAL HAS LONG LEGS AND ANIMAL HAS DARK SPOTS THENHYP ANIMAL IS GIRAFFE ! IF ANIMAL IS BIRD ANDNOT ANIMAL FLIES ANDNOT ANIMAL SWIMS AND ANIMAL HAS LONG NECK AND ANIMAL IS BLACK AND WHITE THENHYP ANIMAL IS OSTRICH ! IF ANIMAL IS UNGULATE AND ANIMAL HAS BLACK STRIPES THENHYP ANIMAL IS ZEBRA ! IF ANIMAL IS UNGULATE AND ANIMAL HAS HORNS THENHYP ANIMAL IS COW ! IF ANIMAL IS BIRD ANDNOT ANIMAL FLIES AND ANIMAL SWIMS AND ANIMAL IS BLACK AND WHITE THENHYP ANIMAL IS PENGUIN ! IF ANIMAL IS BIRD AND ANIMAL FLIES AND ANIMAL FLIES WELL AND ANIMAL HAS WEBBED FEET ANDNOT ANIMAL HAS FLAT BILL THENHYP ANIMAL IS ALBATROS ! IF ANIMAL IS BIRD AND ANIMAL FLIES AND ANIMAL FLIES WELL AND ANIMAL HAS WEBBED FEET AND ANIMAL HAS FLAT BILL THENHYP ANIMAL IS DUCK ! ! ! IFNOT ANIMAL IS CHEETA IFNOT ANIMAL IS TIGER IFNOT ANIMAL IS A HOUSE CAT IFNOT ANIMAL IS GIRAFFE IFNOT ANIMAL IS OSTRICH IFNOT ANIMAL IS ZEBRA IFNOT ANIMAL IS PENGUIN IFNOT ANIMAL IS ALBATROS IFNOT ANIMAL IS DUCK IFNOT ANIMAL IS COW THENHYP THIS ANIMAL IS NOT WITHIN MY KNOWLEDGE ===animal=== echo "Extracting rewrite <-- infer.ar" cat << \===rewrite=== > rewrite Not long ago, George Hageman posted an inference engine written in C to the net. Infer is a re-write of "rulecomp" and "inference". My goals in the rewrite were: remove restrictions, centralize I/O, simplify, enhance, and (if I may coin a word) Unixfy. REMOVING RESTRICTIONS: Well, the best way to do that was to malloc everything. The concept of "rule" has changed from value-string pair to a group of ANTECEDENTS and a group of CONSEQUENTS. Thus, with the original 500 rules, there could be a max of 500 ANTs and CONs with 1 rule used to seperate each. It is roughly one rule per line, plus overhead. With the new, 1000 rules is 1000 statement groups. Beyond that, there are no limitations on the number of strings. The maximum string size is 512 - length of keyword - 1 for the space. The other restriction removed is the need for a seperate compiler, and the intermediate file that it produces. I realize that one goal of Mr. Hageman was to keep the two modules seperate, and therefore smaller, but I'd rather keep the two together and eliminate the extra file. This also simplifies the expert system which wishes to build it's own rule-set, and run the engine on it. CENTRALIZING I/O: In both originals, input and output were spread among all source files. To effect a change, one had to discover all locations that performed the operation. For example, to make "rulecomp" quiet, I had to change lines in three different source files. SIMPLIFYING: Where there were two programs, now there is one. There are no more stacks (except for the crude "WHY" interface). I/O all happens in one place for each operation (not true for output). There was a large amount of code duplication in the original that I bundled into a function or two, and the six different types, along with the 18 keyword representations, have become four flags which may be ORed together. ENHANCING: There is now a rather crude "Why" processor, and the code can detect and display circular logic rules. The strings to be run by the system are executed via system(3). Although this requires the overhead of a shell, the advantages are great. One can use environment variables and redirection, for example. UNIXFING: The concept of quiet and verbose has already been mentioned, and the optional argument to make it verbose has been added. Blank lines are not present in the output, and upper/lower case is used extensivly. Error returns from system and library calls are checked, and apropriate messages are displayed. The exit value from a program is more in line with tradition: 0 = TRUE, !0 = FALSE. Thus, one can call, for example, grep(1) and properly determine the truth-value. As far as size is concerned, infer is smaller than inference, realizing of course that infer mallocs the world, while inference has static data already declared. I've been unable to measure any noticable difference in the speed of the programs, both of which have user time < 1 second. Of course I'd like to say mine is faster, but that would be fibbing. I had read somewhere that only a language traditionally suited for AI was able to handle things like "inference". Thanks, Mr. Hageman, for dissolving that myth. I've gained some valuable knowledge from this rewrite. Thanks, again, Mr. Hageman, for sharing your code with me. I now place this program in the public domain, with no restrictions as to its use. If bugs are found (I'm sure some will be) let me know, so I can incorporate them into my "official" version. Use this program freely, but at your own risk. One should also not sell this program for profit; it was freely given, therefore, freely give. Daniel S. Cox ihnp4!akgua!itm!danny ===rewrite=== echo "Extracting infer.h <-- infer.ar" cat << \===infer.h=== > infer.h typedef struct STR { /* Keeper of the Strings */ char *p; /* pointer to string */ short val; /* UNKNOWN, TRUE, FALSE */ struct STR *next; /* yet-another-linked-list */ } STR; typedef struct ELM { /* Antecedent or Consequence */ STR *str; /* Associated string */ short type; /* STRING or ROUTINE, NOT, HYP */ struct ELM *next; /* kept in linked-list */ } ELEMENT_T; typedef struct { /* Rule */ ELEMENT_T *ant; /* Head of antecedents for this rule */ ELEMENT_T *con; /* Head of consequences for this rule */ short vfy; /* to detect circular logic */ } RULE_T; /* ** General Manifest Constants */ # define ANT 1 /* in IF part of rule */ # define CON 2 /* in THEN part of rule */ # define COMMENT_CHAR '!' /* ignore lines beginning with this */ # define MAXRULE 1000 /* plenty */ # define MAXWHY 100 /* things proven true, plus current */ # define STRSIZE 512 /* should be plenty */ /* ** Type definitions */ # define STRING 000 /* display this string */ # define ROUTINE 001 /* run this string via the shell */ # define NOT 002 /* invert truth-value of assoc. string */ # define HYP 004 /* if TRUE, exit */ # define COMMENT 010 /* this line is a comment */ # define NONE 020 /* no keyword recognized */ /* ** Defines for element vals */ # define UNKNOWN 42 /* the answer to Life, the Universe, etc */ # define TRUE (-1) /* all binary ones (most places) */ # define FALSE 0 /* all zeros (most places) */ /* ** External stuff */ extern char *progname; extern int errno; extern char *sys_errlist[]; extern int verbose; extern STR *SP; extern RULE_T *Rule[]; extern int nrules; extern char *emalloc (), *strcpy (), *strcat (); extern void exit (); ===infer.h=== exit