.TITLE EDFIND Pattern matching for TED and VMS .ident /1.0.01/ ; 22-Nov-84 10:37:34 Brian Nelson ; ; Rewrite of FINSTR into Macro-32 for speed ; .psect BIGA = ^O101 BIGZ = ^O132 SMALLA = BIGA ! ^O40 SMALLZ = BIGZ ! ^O40 ; Map case insensitive letters to each other .psect mtab_map,PIC,USR,OVR,REL,LCL,SHR,NOEXE,RD,WRT,NOVEC,LONG mtab_map:$$ = 0 ; generate match tables that will .rept BIGA ; map lowercase letters to upper .byte $$ ; case letters. Doing so can save $$ = $$ + 1 ; about 20% of the cpu time spent .endr ; doing a case insensitive match. $$ = BIGA .rept BIGZ-BIGA+1 ; Now map uppercase letters to .byte $$ ; themselves. $$ = $$ + 1 ; simple to do, will do the same .endr ; to lowercase letters minus ^o40 $$ = BIGZ+1 ; start with ord('Z')+1 .rept SMALLA-BIGZ-1 ; the characters between 'Z' and .byte $$ ; 'a' map to themselves. $$ = $$ + 1 ; get these mapped also please .endr ; simple $$ = BIGA ; Now map the lowercase letters .rept SMALLZ-SMALLA+1 ; to the correspnding maps that .byte $$ ; the uppercase letters have. $$ = $$ + 1 ; next please. .endr ; all done $$ = SMALLZ+1 ; map the rest up to 377 .rept ^o377- ; the rest of the character set. .byte $$ ; map them to themselves $$ = $$ + 1 .endr ; all done .psect w_l_address,PIC,USR,OVR,REL,GBL,SHR,NOEXE,RD,WRT,NOVEC,LONG w_l_address: .psect matchflag,PIC,USR,OVR,REL,GBL,SHR,NOEXE,RD,WRT,NOVEC,LONG matchflag: .psect edfind_xl,PIC,USR,OVR,REL,GBL,SHR,NOEXE,RD,WRT,NOVEC,LONG edfind_xl: .blkb 512 .psect edsrc_xl,PIC,USR,OVR,REL,GBL,SHR,NOEXE,RD,WRT,NOVEC,LONG edsrc_xl: .blkb 512 .psect code ,lcl,con,exe,shr,rel,nowrt .sbttl instr fast version using MATCHC ;+ INSTR(src,srclen,pat,patlen) Fast pattern matching using MATCHC ; ; INSTR will check for case insensitive call, and if so will call ; FINSTR to process the request, otherwise it will use the VAX ; MATCHC instruction to accomplish the search quickly (?). ;- .psect code ,lcl,con,exe,shr,rel,nowrt .entry instr ,^M tstl matchflag ; does case matter today? beql 10$ ; no pushl 16(ap) ; regenerate the call list for pushl 12(ap) ; finstr pushl 8(ap) ; ... pushl 4(ap) ; ... calls #4 ,finstr ; and at last, do it. brb 100$ ; exit 10$: movzwl 8(ap) ,r4 ; length of the string to scan in beql 90$ ; nothing there, exit please movzwl 16(ap) ,r5 ; length of the pattern string beql 90$ ; nothing there, exit with failure matchc r5,@12(ap),r4,@4(ap) ; do a quick check with matchc bneq 90$ ; not found subl3 r2,r4 ,r0 ; found it, compute the correct subl2 r5 ,r0 ; position and exit incl r0 ; all done brb 100$ ; exit 90$: clrl r0 ; not found, exit 100$: ret .entry finstr ,^M movl 4(ap) ,r7 ; get the source string address movl 12(ap) ,r8 ; get the pattern address also movl r8 ,r1 ; /1/ save it movzwl 16(ap) ,r6 ; get the pattern string length movl r6 ,r0 ; /1/ save it beql 6$ ; nothing to do so exit then movl 8(ap) ,r5 beql 6$ 5$: bitb #128 ,(r1)+ bnequ 9$ sobgtr r0 ,5$ cmpw r5 ,#512 bgtr 9$ movtc r6,(r8),#0,mtab_map,r6,edfind_xl movtc 8(ap),(r7),#0,mtab_map,8(ap),edsrc_xl locc edfind_xl,8(ap),edsrc_xl beql 6$ matchc r6,edfind_xl,8(ap),edsrc_xl bneq 6$ ; not found subl3 r2,8(ap),r0 ; found it, compute the correct subl2 r6 ,r0 ; position and exit incl r0 ; all done ret 6$: clrl r0 ret 9$: subl3 r6,r5,r10 ; get the number of times to loop clrl r9 ; current position in the string 10$: cmpb (r7) ,(r8) ; do the characters match at all? beql 20$ ; yes jsb matchc ; no, check for wildcards and case tstl r0 ; insensitive matching beql 80$ ; no match, move up one character 20$: movl r8 ,r4 ; save( pattern_pointer ) movl r7 ,r3 ; save( source_pointer ) decl r6 ; Anything left to check in pattern? bgtr 30$ ; Yes incl r9 ; no, return current position+1 brb 100$ ; exit 30$: incl r8 ; more to do, move along in the source incl r7 ; and also in the pattern string. cmpb (r7) ,(r8) ; place this check inline beql 35$ ; if not, then call matchc jsb matchc ; see if the next character matches tstl r0 ; did they match? beql 50$ ; no 35$: sobgtr r6 ,30$ ; yes, check the next one please 40$: incl r9 ; success, exit brb 100$ ; bye 50$: movl r4 ,r8 ; restore the pattern string pointer movl r3 ,r7 ; restore the source string pointer movzwl 16(ap) ,r6 ; restore the pattern string length 80$: incl r7 ; next source string character please incl r9 ; increment current position sobgeq r10 ,10$ ; check next character in source 90$: clrl r9 ; failure, return zero and exit 100$: movl r9 ,r0 ; success, return the position ret .sbttl matchc check for character match, special case ;+ MATCHC(c1,c2) ; ; Matchc will first check for equality without concern for ; case. If this fails it will then check for META characters ; in the pattern character, and if so will try to match the ; various classes. ; This is rewritten from the C version for speed, since pat- ; tern matching is just about the most frequent thing a text ; editor has to do. ; ; Passed: R7 address of source character ; R8 address of pattern character ; ;- Return: R0 zero for failure, else success matchc: movzbl #1 ,r0 ; success flag (assume true) movzbl (r7) ,r1 ; the source string movzbl (r8) ,r2 ; the pattern string cmpb r2 ,#METAMIN ; is this a wildcarded character blssu 10$ ; no jsb meta_match ; yes, try for class matches brb 100$ ; and exit 10$: cmpb L^mtab_map(r1),L^mtab_map(r2) ; do the characters map to the same? beqlu 100$ ; yes, return success then 90$: clrl r0 ; failure exit 100$: rsb ; exit .sbttl meta character matching ;+ Passed: R1 the character to match ; R2 the meta character ; ;- Return: R0 zero for failure, else success ; #define metamin 192 ; #define any 192 ; #define notalfa 193 ; #define let 195 ; #define alfa 196 ; #define digit 197 ; #define letdig 200 ; #define white 201 ; #define smalllet 202 ; #define biglet 203 METAMIN = ^D192 METAMAX = ^D203 LARGEA = ^O101 LARGEZ = ^O132 SMALLA = LARGEA!^O40 SMALLZ = LARGEZ!^O40 SPACE = ^O40 TAB = ^O11 meta_match: casew r2,#METAMIN,#METAMAX-METAMIN 10$: .word case_any - 10$ ; 192 match anything .word case_notalfa - 10$ ; 193 match not in (a..z,A..Z,0..9,.,$) .word case_notimp - 10$ ; 194 obvious by the name .word case_let - 10$ ; 195 match (a..z,A..Z) .word case_alfa - 10$ ; 196 match (a..z,A..Z,0..9,.,$) .word case_digit - 10$ ; 197 match (0..9) .word case_notimp - 10$ ; 198 obvious by the name .word case_notimp - 10$ ; 199 obvious by the name .word case_letdig - 10$ ; 200 match (a..z,A..Z,0..9) .word case_white - 10$ ; 201 match (SPACE,TAB) .word case_smalllet - 10$ ; 202 match (a..z) .word case_biglet - 10$ ; 203 match (A..Z) clrl r0 ; xxx anything else rsb case_notimp: clrl r0 ; return(0) for not implemented rsb ; and exit case_any: movzbl #1 ,r0 ; return(1) rsb ; with result true in r0 case_notalfa: jsb case_alfa ; simply call alfa and return with decl r0 ; convert true to false (1 to zero) rsb ; and zero to -1 case_let: cmpb r1 ,#LARGEA ; check for small or large letter blssu 20$ ; can't be a letter cmpb r1 ,#LARGEZ ; try 'Z' blequ 10$ ; could be lowercase cmpb r1 ,#SMALLA ; try lowercase now blssu 20$ ; no cmpb r1 ,#SMALLZ ; try lowercase z bgtru 20$ ; no 10$: movzbl #1 ,r0 ; success rsb 20$: clrl r0 ; failure rsb case_alfa: cmpb r1 ,#^A/$/ ; allow $ and . please beql 10$ ; got it cmpb r1 ,#^A/./ ; what about this one? beql 10$ ; success jsb case_let ; check for a letter ? tstl r0 ; well? bneq 10$ ; success jsb case_digit ; try for a number brb 100$ ; exit with digit status then 10$: movzbl #1 ,r0 ; return(success) 100$: rsb ; and exit case_digit: cmpb r1 ,#^A/0/ ; check for a digit today blssu 10$ ; no cmpb r1 ,#^A/9/ ; well? bgtru 10$ ; no movzbl #1 ,r0 ; yes, return success rsb 10$: clrl r0 ; failure, return(0) rsb ; and back to matchc case_letdig: jsb case_let ; a letter ? tstl r0 ; well? bneq 10$ ; yes jsb case_digit ; no, what about a digit 10$: rsb case_white: cmpb r1 ,#SPACE ; a space ? beql 10$ ; yes cmpb r1 ,#TAB ; a tab? bneq 20$ ; no 10$: movzbl #1 ,r0 ; return success rsb ; back to matchc 20$: clrl r0 ; return failure rsb ; back to matchc case_smalllet: cmpb r1 ,#SMALLA ; lower case letter? blssu 20$ ; no cmpb r1 ,#SMALLZ ; perhaps, is it in range bgtru 20$ ; no movzbl #1 ,r0 ; yes, return(1) rsb 20$: clrl r0 ; no, return failure rsb case_biglet: cmpb r1 ,#LARGEA ; lower case letter? blssu 20$ ; no cmpb r1 ,#LARGEZ ; perhaps, is it in range bgtru 20$ ; no movzbl #1 ,r0 ; yes, return(1) rsb 20$: clrl r0 ; no, return failure rsb .end