.title lex ; ;**************************************************************************** ;* * ;* Copyright (c) 1980, 1981 * ;* by DIGITAL Equipment Corporation, Maynard, Mass. * ;* * ;* This software is furnished under a license and may be used and copied * ;* only in accordance with the terms of such license and with the * ;* inclusion of the above copyright notice. This software or any other * ;* copies thereof may not be provided or otherwise made available to any * ;* other person. No title to and ownership of the software is hereby * ;* transferred. * ;* * ;* The information in this software is subject to change without notice * ;* and should not be construed as a commitment by DIGITAL Equipment * ;* Corporation. * ;* * ;* DIGITAL assumes no responsibility for the use or reliability of its * ;* software on equipment which is not supplied by DIGITAL. * ;* * ;**************************************************************************** ;++ ; facility: ; ; SDL (Structure Definition Language) ; ; abstract: ; ; The lexical analyzer called by the PAT parser. It returns a token ; id, string, and string length in the common structure TOKEN ; ; author: ; c.t. pacy ; date: ; revised 22-DEC-1981 ctp ; revised Oct-1983 kd pass shareable structure - sdl$_shr_data ; revised 21-Nov-1984 kd Add TYPE keyword ; ; C H A N G E L O G ; ; Date | Name | Description ;_______________|_______|______________________________________________________ ; 8-Aug-1985 | kd | 2-1 Zero out the curline upon eof. ;_______________|_______|______________________________________________________ ; 12-Aug-1985 | kd | 2-2 Don't zero out curline for include files. ;_______________|_______|______________________________________________________ ; 22-Mar-1986 | pc | 2-3 Added new keyword LIST ;_______________|_______|______________________________________________________ ; 20-Mar-1987 | jw | 2-4 (SDL T3.1) Added new keyword RTL_STR_DESC ;_______________|_______|______________________________________________________ ; 2-Apr-1987 | jw | X3.1-0 Bumped version edit level and switched to X ;_______________|_______|______________________________________________________ ; 7-Apr-1987 | jw | X3.1-1 Added new keyword COMPLEX ;_______________|_______|______________________________________________________ ; 10-Jun-1987 | pc | X3.1-2 Fix for bug 76 ;_______________|_______|______________________________________________________ ; 4-Jan-1988 | PG | X3.2-0 Add STRING keyword ;_______________|_______|______________________________________________________ ; 14-Jan-1988 | PG | X3.2-1 Fix " bug ;_______________|_______|______________________________________________________ ; 15-Jan-1988 | jg | X3.2-2 Add TYPEDEF keyword ;_______________|_______|______________________________________________________ ; 18-Jan-1988 | jg | X3.2-3 Add VOID and SIZEOF keywords ;_______________|_______|______________________________________________________ ; 22-Jan-1988 | jg | X3.2-4 Add DECLARE keyword ;_______________|_______|______________________________________________________ ; 15-Feb-1988 | jg | X3.2-5 Add IFLANGUAGE, END_IFLANGUAGE, LITERAL, ; | | END_LITERAL keywords. ; | | Create lisbuf for listing line. ;_______________|_______|______________________________________________________ ; 26-Feb-1988 | jg | X3.2-6 Add READ keyword. ;_______________|_______|______________________________________________________ ; 26-Feb-1990 | lw | X3.2-7 Add IFSYMBOL, END_IFSYMBOL keywords. ; | | Add INTEGER data type. ;_______________|_______|______________________________________________________ ; 31-Oct-1991 | AWF | EV1-0 Added ALIGN and NOALIGN keywords. ;_______________|_______|______________________________________________________ ; 28-Nov-1991 | RMM | EV1-1 Added $CODE PSECT ; | | Changed CALLS #2 to GETLINE to #3 ; | | Added branch stepping stone ; | | All that to make it VESTable for Alpha ;_______________|_______|______________________________________________________ ; 20-May-1992 | JAK | EV1-10 Merged EV1-0 and EV1-1 changes. ; | | Corrected references to listline to longword. ;_______________|_______|______________________________________________________ ; 21-Jan-1993 | JAK | EV1-20 Added keyword BASEALIGN. ;_______________|_______|______________________________________________________ ; 8-Feb-1993 | JAK | EV1-21 Added keywords for ELSE, ELSE_IFSYMBOL, ELSE_IFLANGUAGE. ;_______________|_______|______________________________________________________ ;-- .IDENT /EV1-20/ $tokdef $semdef $MISCELLANY ; ; keywords ; keywordlen=20 .save .psect keytable noexe,lcl,con keytable: .psect keytoken noexe,lcl,con keytoken: .restore .macro keyword nam,?l1 .enable lsb .save .psect keytable noexe,lcl,con l1: .ascii /\'nam/ siz=keywordlen-<.-l1> .if lt siz .warn ; Keyword nam is too long for table. .endc .rept siz .byte 0 .endr .psect keytoken noexe,lcl,con .byte t_'nam .restore .disable lsb .endm keyword KEYWORD AGGREGATE KEYWORD CONSTANT KEYWORD END KEYWORD END_MODULE KEYWORD ENTRY KEYWORD ITEM KEYWORD MODULE KEYWORD EQUALS KEYWORD STRING ; PG KEYWORD COMMON KEYWORD DIMENSION KEYWORD PREFIX KEYWORD TAG KEYWORD INCREMENT KEYWORD PARAMETER KEYWORD RETURNS KEYWORD VALUE KEYWORD GLOBAL KEYWORD VARIABLE KEYWORD ADDRESS KEYWORD BOOLEAN KEYWORD BYTE KEYWORD CHARACTER KEYWORD DECIMAL KEYWORD D_FLOATING KEYWORD F_FLOATING KEYWORD G_FLOATING KEYWORD H_FLOATING KEYWORD LONGWORD KEYWORD OCTAWORD KEYWORD QUADWORD KEYWORD BITFIELD KEYWORD WORD KEYWORD STRUCTURE KEYWORD UNION KEYWORD ANY KEYWORD LENGTH KEYWORD MASK KEYWORD PRECISION KEYWORD UNSIGNED KEYWORD VARYING KEYWORD ORIGIN KEYWORD DESCRIPTOR KEYWORD RTL_STR_DESC KEYWORD COMPLEX KEYWORD COUNTER KEYWORD IN KEYWORD OUT KEYWORD LIST KEYWORD NAMED KEYWORD IDENT KEYWORD BASED KEYWORD FILL KEYWORD ALIAS KEYWORD DEFAULT KEYWORD REFERENCE KEYWORD LINKAGE KEYWORD INCLUDE KEYWORD OPTIONAL KEYWORD SIGNED KEYWORD MARKER KEYWORD TYPENAME KEYWORD TYPEDEF ; jg KEYWORD VOID ; jg KEYWORD SIZEOF ; jg KEYWORD DECLARE ; jg KEYWORD IFLANGUAGE ; jg KEYWORD END_IFLANGUAGE ; jg KEYWORD LITERAL ; jg KEYWORD END_LITERAL ; jg KEYWORD READ ; jg KEYWORD INTEGER ; lw KEYWORD IFSYMBOL ; lw KEYWORD END_IFSYMBOL ; lw KEYWORD HARDWARE_ADDRESS ; lw KEYWORD HARDWARE_INTEGER ; lw KEYWORD POINTER_HW KEYWORD POINTER_LONG KEYWORD POINTER KEYWORD POINTER_QUAD KEYWORD INTEGER_BYTE KEYWORD INTEGER_WORD KEYWORD INTEGER_LONG KEYWORD INTEGER_QUAD KEYWORD INTEGER_HW KEYWORD ALIGN ; awf KEYWORD NOALIGN ; awf KEYWORD BASEALIGN ; jak KEYWORD ELSE KEYWORD ELSE_IFSYMBOL KEYWORD ELSE_IFLANGUAGE .save .psect keytable noexe,lcl,con keytablelen=.-keytable .restore white_space=1 alpha=2 digit=4 punctuation=8 bindig=16 octdig=32 decdig=64 hexdig=128 scantbl: .rept 33 .byte white_space .endr .byte punctuation ; ! .byte 0 ; " .byte 0 ; # .byte alpha ; $ .byte 0 ; % .byte punctuation ; & .byte 0 ; ' .byte punctuation ; ( .byte punctuation ; ) .byte punctuation ; * .byte punctuation ; + .byte punctuation ; , .byte punctuation ; - .byte punctuation ; . .byte punctuation ; / .byte digit!bindig!octdig!decdig!hexdig ;0 .byte digit!bindig!octdig!decdig!hexdig ;1 .rept 6 .byte digit!octdig!decdig!hexdig ;2-7 .endr .byte digit!decdig!hexdig ;8 .byte digit!decdig!hexdig ;9 .byte punctuation ; : .byte punctuation ; ; .byte 0 ; < .byte punctuation ; = .byte 0 ; > .byte 0 ; ? .byte punctuation ; @ .rept 6 .byte alpha!hexdig ;A-F .endr .rept 20 .byte alpha ; G-Z .endr .byte 0 ; [ .byte 0 ; \ .byte 0 ; ] .byte punctuation ; ^ .byte alpha ; _ .byte 0 ; ` .rept 6 .byte alpha!hexdig ; a-f .endr .rept 20 .byte alpha ;g-z .endr .byte 0 ; { .byte 0 ; | .byte 0 ; } .byte 0 ; ~ .rept 129 .byte 0 .endr punctbl: .byte t_exclamation ; ! .byte 0,0,0,0 ; ",#,$,% .byte t_ampersand ; & .byte 0 ; ' .byte t_l_paren ; ( .byte t_r_paren ; ) .byte t_star ; * .byte t_plus ; + .byte t_comma ; , .byte t_minus ; - .byte t_dot ; . .byte t_slash ; / .rept 10 ; ascii 48-57 .byte 0 .endr .byte t_colon ; : .byte t_semicolon ; ; .byte 0 ; < .byte t_equal ; = .byte 0,0 ; >,? .byte t_at ; @ .rept 29 ; ascii 65-93 .byte 0 .endr .byte t_circumflex ; ^ upcasetbl: .byte 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20 .byte 21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40 .byte 41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60 .byte 61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80 .byte 81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96 .BYTE ^A/A/,^A/B/,^A/C/,^A/D/,^A/E/,^A/F/,^A/G/,^A/H/,^A/I/,^A/J/ .BYTE ^A/K/,^A/L/,^A/M/,^A/N/,^A/O/,^A/P/,^A/Q/,^A/R/,^A/S/,^A/T/ .BYTE ^A/U/,^A/V/,^A/W/,^A/X/,^A/Y/,^A/Z/ .byte 123,124,125,126,127 tokovf: .byte 0 desc: .word 4 .ascii /1024/ ; ; the following must be adjacent bytes_left: .long 0 curpos: .long 0 ; the above must be adjacent init_length: .long 0 curline: .long 0 pushed_infile: .blkl 1 pushed_bytes_left: .blkl 1 pushed_curpos: .blkl 1 pushed_buf: .blkb 80 ; ; ; the following must be adjacent buflen: .word 0 inbuf: .blkl 1024 ; ; the above must be adjacent ; ; the following must be adjacent ; jg lisbuflen: ; jg .word 0 ; jg lisbuf: ; jg .blkl 1024 ; jg ; ; jg ; the above must be adjacent ; jg lisbufdesc: ; jg .long 0 .address lisbuf ; jg line_length=132 ; ; radsym: .byte ^a/D/ .byte ^a/X/ .byte ^a/B/ .byte ^a/O/ ; raddig: .byte decdig .byte hexdig .byte bindig .byte octdig ; sdl_comment_char = ^a/{/ ; .psect $code,pic,usr,con,rel,lcl,shr,exe,rd,nowrt,novec .entry lex ^m movl @4(AP), r11 ; get the shared structure incl token_index ; get next token element cmpl #10,token_index bgtr 1$ clrl token_index 1$: mull3 #token_size,token_index,r10 movab token[r10],r10 movab token_string(r10),token_address(r10) clrb start_line(r10) clrb synthetic(r10) movl curline,lineno(r10) tstl bytes_left ; see if any more line left bgtr 10$ ; if so, br 5$: pushab lisbuf-2 ; jg pushab inbuf-2 ; get a new line pushl r11 ; first argument is the shared structure calls #3,sdl$getline ; (returns length read in r0) incl curline ; incr. src line # movl curline,lineno(r10) movb #1,start_line(r10) ; set beg. of line bit movl r0,bytes_left ; returned length=bytes left movl r0,init_length bgeq 7$ ; if not eof, br 6$: movzbl #t_eof,token_id(r10) ; return eof token clrl token_length(r10) ; zero token length tstl file_level(r11) ; see if we are processing an incl. file bgtr 11$ ; if so, don't zero current line clrl curline ; zero the current line no. 11$: ret 7$: movab inbuf,curpos ; reset current pos. ptr bbc #sdl$v_listing_opt,sdl$gl_options(r11),10$ ; if no listing, br ; ; write line to listing movl lisbuflen,lisbufdesc ; jg cvtwl lineno(r10),listline(r11) pushl r11 ; the first arg. is the shared structure pushab lisbufdesc ; jg calls #2,sdl$writelist ; 10$: spanc bytes_left,@curpos,scantbl,#white_space ; span over any white space bneq 1001$ ; if nothing there get new line 1000$: brw 5$ ; stepping stone to 5$ 1001$: movzbl (r1),r2 ; get ascii byte and use for index cmpb r2,#sdl_comment_char ; throw out sdl-local comments beql 1000$ movq r0,r7 ; save volatile regs bitb #alpha,(r2)[r3] ; see if we have an alpha char beql 30$ ; if not, br spanc r0,(r1),scantbl,#alpha!digit ; span over identifier chars movq r0,bytes_left ; reset bytes left,current pos subl3 r0,r7,r6 ; get size of token movtc r6,(r8),#0,upcasetbl,#keywordlen,token_string(r10) ; copy and upper case the token movb #^a!\!,token_string-1(r10) ; put backslash in front ; (this is token_length field) matchc #keywordlen,token_string-1(r10),#keytablelen,keytable ; see if we have a keyword bneq 28$ ; if not, br subl2 #keytable,r3 ; get index into keyword table divl2 #keywordlen,r3 decl r3 movzbl keytoken[r3],token_id(r10) ; get the keyword tok. id movl r6,token_length(r10) ; put in the token size subw3 bytes_left,init_length,colno(r10) ; get col ret 28$: movzbl #t_name,token_id(r10) ; just a regular identifier movc3 r6,(r8),token_string(r10) ;copy the orig. identifier movl r6,token_length(r10) ; put in the token size subw3 bytes_left,init_length,colno(r10) ; put in column ret 30$: cmpb (r1),#^a/#/ ; check for local name bneq 40$ ; if not one, br movl r1,r4 ; save start of token incl r1 ; point past # char decl r0 ; decr. remaining byte count spanc r0,(r1),scantbl,#alpha!digit ; span over identifier chars movzbl #t_local_name,token_id(r10) ; set local name token movq r0,bytes_left ; reset bytes_left,current pos subl3 r0,r7,token_length(r10) ; set token length movtc token_length(r10),(r4),#0,upcasetbl,token_length(r10),token_string(r10) ; upcase the token subw3 bytes_left,init_length,colno(r10) ; put in column ret 40$: clrl r5 ; set decimal radix default cmpb (r1),#^a/%/ ; check for radix code beql 42$ ; if got one, br bitb #digit,(r2)[r3] ; see if just digits bneq 46$ ; if so, br brw 50$ ; else continue looking 42$: incl r1 ; point past % decl r0 movzbl (r1)+,r9 ; get the radix letter decl r0 ; point past it bicb #^x20,r9 ; upcase the letter cmpb r9,#^a/A/ ; check for ascii escape bneq 44$ ; if not continue checking decl r0 ; any character here goes incl r1 brb 48$ 44$: cmpb radsym[r5],r9 ; go thru table to find the letter beql 46$ aoblss #4,r5,44$ ; if invalid radix code, then look for some white space delimiter ; and send it all back as a constant (however invalid -- let action routines ; handle it) scanc r0,(r1),scantbl,#white_space!punctuation brb 48$ 46$: spanc r0,(r1),scantbl,raddig[r5] ; span to end of constant 48$: movzbl #t_numeric,token_id(r10) ; numeric token id subl3 r0,r7,token_length(r10) ; set token length movq r0,bytes_left ; reset bytes left,current pos movc3 token_length(r10),(r8),token_string(r10) ; copy token subw3 bytes_left,init_length,colno(r10) ; put in column ret 50$: cmpw (r1),#^a?/*? ; check for comment bneq 55$ ; if not one, br subl3 #2,r0,r5 ; check bytes left blss 55$ movl r5,r0 addl #2,r1 ; point past comment chars movl r0,token_length(r10) ; set length movc3 r0,(r1),token_string(r10) ; copy to token string movzbl #t_comment,token_id(r10) ; comment token type movq r0,bytes_left ; reset bytes left, current pos subw3 bytes_left,init_length,colno(r10) ; put in column ret 55$: bitb #punctuation,(r2)[r3] ; see if punctuation char beql 60$ ; if not, br subl2 #^a/!/,r2 ; index in punctuation token id table movzbl punctbl[r2],token_id(r10) ; punctuation token id movb (r1)+,token_string(r10) ; copy token movl r1,curpos ; reset current pos movzbl #1,token_length(r10) ; set token size subl3 #1,r0,bytes_left ; reset bytes_left subw3 bytes_left,init_length,colno(r10) ; put in column ret 60$: cmpb (r1),#^a/"/ ; check for quoted string beql 61$ brw 100$ ; if not quote, then br 61$: clrl token_length(r10) ; clr the tok length clrb tokovf ; clear token overflow indicator movab token_string(r10),r8 ; setup token string ptr 65$: incl r1 ; point past quote decl r0 movq r0,bytes_left ; reset bytes left, current pos 70$: locc #^a/"/,r0,(r1) ; look for closing quote subl3 r0,bytes_left,r6 ; get length of this token piece addl r6,token_length(r10) ; add to accumulated token length cmpl token_length(r10),#maxtoksiz ; see if token too big bgtr 75$ ; if so, br movq r0,-(sp) ; save remaining bytes, current pos movc3 r6,@curpos,(r8) ; append this string to token movl r3,r8 ; reset token string ptr movq (sp)+,r0 ; restore r0,r1 brb 80$ 75$: movl #maxtoksiz,token_length(r10) ; reset token length to max blbs tokovf,80$ ; if we already put out a msg, br movq r0,-(sp) ; save volatile regs pushl #sdl$_tokovf pushal desc pushaw lineno(r10) pushal 8(sp) pushl r11 ; the sdl shared structure is the 1st arg. calls #4,errmsg ; print error msg addl #4,sp movq (sp)+,r0 ; restore volatile regs bisb #1,tokovf 80$: movl r1,curpos ; reset current pos to terminating byte movl r0,bytes_left ; if 0, then no " found bneq 90$ ; if stopped by a closing ", then br pushab lisbuf-2 ; jg pushab inbuf-2 ; else, call for a new line pushl r11 ; the first argument is the shared structure calls #3,sdl$getline ; (returns length read in r0) tstl r0 ; check for eof return bgeq 85$ ; if ok, br movzbl #t_eof,token_id(r10) ; else, return eof token (error) clrl token_length(r10) ret 85$: bbc #sdl$v_listing_opt,sdl$gl_options(r11),86$ ; if no listing, br pushl r0 ; write the listing movl lisbuflen,lisbufdesc ; jg cvtwl lineno(r10),listline(r11) pushl r11 ; the first arg. for writelist is the shared structure pushab lisbufdesc ; jg calls #2,sdl$writelist popl r0 86$: movab inbuf,r1 ; reset current byte movq r0,bytes_left ; reset saved length & pos brw 70$ ; and keep looking 90$: cmpl #1,r0 ; check if last on line beql 95$ ; if so, it's close cmpb #^a/"/,1(r1) ; check for double quotes bneq 95$ ; if not found, that's it incl r1 ; point past quote PG decl r0 ; PG movq r0,bytes_left ; reset bytes left, current pos PG movb #^a/"/,(r8)+ ; Add a " to output token PG incl token_length(r10) ; Bump the token length PG brw 65$ ; else keep looking for close 95$: addl3 #1,r1,curpos ; reset current pos past closing " subl3 #1,r0,bytes_left movzbl #t_string_literal,token_id(r10) ; string token PG subw3 bytes_left,init_length,colno(r10) ; put in column ret 100$: movzbl #t_errormark,token_id(r10) ; return error token movzbl #1,token_length(r10) ; length of char movb (r1),token_string(r10) ; the token itself addl3 #1,r1,curpos ; reset current position subl3 #1,r0,bytes_left ; reset bytes left subw3 bytes_left,init_length,colno(r10) ; put in column ret ; routines for pushing and poping input file state ; (initially just one level) .entry set_incl_text ^m<> ; movl infile,pushed_infile ; movl incl_file,infile movl bytes_left,pushed_bytes_left movl curpos,pushed_curpos movc3 #80,inbuf,pushed_buf clrl bytes_left clrl curpos ret .entry reset_incl_text ^m<> movl pushed_bytes_left,bytes_left movl pushed_curpos,curpos movc3 #80,pushed_buf,inbuf ; movl pushed_infile,infile ret .end