flat assembler
Message board for the users of flat assembler.
Index
> High Level Languages > match(pattern, source) in C |
Author |
|
soul_master 18 Jul 2023, 02:59
An example match(pattern, source) function written in C. It works like FASM's match except that it uses the rarest ` symbol for literal match ("`name"), and =, are not special. Example:
Code: match("any", " "); match("`byte a=b", "byte n=1*2+3"); match("`mov a, [b]", "mov eax, [esi+ecx*4]"); match("type name[size]={value}", "int a[4]={5,6,7,8}") See attachment > MATCH.C/H. *.BAT requires TinyCC to compile and run. Download TinyC compiler + Notepad++ editor (4mb *.zip): https://github.com/starpow3r/tcc Click [<> Code] > Download .ZIP. Setup: Just copy /TCC/ folder to C:/. Target: C:/TCC/TCC.EXE. Any improvements, bug reports, or comments are welcome. Example: Code: // MATCH EXAMPLE #include "match.h" void test(), test_match(); int main() { test_match(); printf("Success!\n\n"); system("output.txt"); getch(); return 0; } void test_match() { if (!create_file("output.txt")) { source_error("Error (test_match): " \ "create_file(output)"); return; } log("Test match(pattern, source). " \ "If true: matches[#].name=value\n\n"); text pattern="type name[size]={value}", source="int a[4]={5,6,7,8}"; log("Example: match(\"%s\", \"%s\")...\n\n", pattern, source); if (match(pattern, source)) { for (int i=0; i<n_matches; i++) log("#%d: %s=%s, matches[%d].name=%s, " \ "matches[%d].value=%s\n", i+1, matches[i].name, matches[i].value, i, matches[i].name, i, matches[i].value); } log("\n"); test(" ", " "); test(" ", "hi"); test("hi", " "); test("any", "x"); test("any", " "); test("x", "y"); test("a+b", "1+2*3"); test("a=b", "i=1+2-3*4"); test("a", "p[1+2-3*4]"); test("a+b", "1+2-3*4"); test("a*b", "1+2-3*4"); test("a-b", "1+2-3*4"); test("a+b*c", "x*y"); test("`get", "get"); test("`int n", "int"); test("`uint n", "uint size"); test("`text t", "text a,b,c;"); test("`byte a=b", "byte n=1*2+3"); test("`mov a, [b]", "mov eax, [esi+ecx*4]"); test("v=f(p)", "c=rgb(r,g,b)"); log("\n"); close_file(); } void test(text a, text b) { int m=match(a, b); log("match(\"%s\", \"%s\") = %s", a, b, m ? "True":"False"); if (m) { if (n_matches) log(" (%d):", n_matches); for (int i=0; i<n_matches; i++) { log(" #%d: %s=%s", i+1, matches[i].name, matches[i].value); if (i<n_matches-1) log(","); } } log("\n"); } Output: Code: Test match(pattern, source). If true: matches[#].name=value Example: match("type name[size]={value}", "int a[4]={5,6,7,8}")... #1: type=int, matches[0].name=type, matches[0].value=int #2: name=a, matches[1].name=name, matches[1].value=a #3: size=4, matches[2].name=size, matches[2].value=4 #4: value=5,6,7,8, matches[3].name=value, matches[3].value=5,6,7,8 match(" ", " ") = True match(" ", "hi") = False match("hi", " ") = False match("any", "x") = True match("any", " ") = False match("x", "y") = True (1): #1: x=y match("a+b", "1+2*3") = True (2): #1: a=1, #2: b=2*3 match("a=b", "i=1+2-3*4") = True (2): #1: a=i, #2: b=1+2-3*4 match("a", "p[1+2-3*4]") = True (1): #1: a=p[1+2-3*4] match("a+b", "1+2-3*4") = True (2): #1: a=1, #2: b=2-3*4 match("a*b", "1+2-3*4") = True (2): #1: a=1+2-3, #2: b=4 match("a-b", "1+2-3*4") = True (2): #1: a=1+2, #2: b=3*4 match("a+b*c", "x*y") = False match("`get", "get") = True match("`int n", "int") = False match("`uint n", "uint size") = True (1): #1: n=size match("`text t", "text a,b,c;") = True (1): #1: t=a,b,c; match("`byte a=b", "byte n=1*2+3") = True (2): #1: a=n, #2: b=1*2+3 match("`mov a, [b]", "mov eax, [esi+ecx*4]") = True (2): #1: a=eax, #2: b=esi+ecx*4 match("v=f(p)", "c=rgb(r,g,b)") = True (3): #1: v=c, #2: f=rgb, #3: p=r,g,b Helper code: #defines, typedefs, text, conversions. Code: typedef unsigned uint; typedef char *text; #define and && #define or || #define not ! #define KB 1024 #define object typedef struct #define allocate(t,p,n) (p=(t)malloc(n)) #define destroy(p) free(p), p=0 #define memory_zero(p, n) memset(p, 0, n) // number conversions text u2t(uint n, text t) { text p=t; if (!n) { *t++='0', *t=0; return t; } while (n) *p++=(n%10)+'0', n/=10; *p=0; text_reverse(t); return p; } text i2t(int n, text t) { if (n<0) *t++='-', n=-n; return u2t(n, t); } uint t2u(text t) { uint n; for (n=0; *t; t++) n=(n*10)+(*t-'0'); return n; } int t2i(text t) { int n=0, sign=0; if (*t=='-') sign=1, t++; n=t2u(t); if (sign) n=-n; return n; } uint t2h(text t) { uint n, c, x; for (n=0; *t; t++) { c=*t; if (c>='0' and c<='9') x=(c-'0'); else if (c>='a' and c<='f') x=(c-'a')+10; else if (c>='A' and c<='F') x=(c-'A')+10; else break; n=(n*16)+x; } return n; } uint t2b(text t) { uint n; for (n=0; *t; t++) n=(n*2)+(*t-'0'); return n; } Is character of type? Code: // is character of type? dot . is a symbol (is_symbol), // and a name may contain it (is_name_c, "my.name"), // using . as a separator, but names can't begin with . // (is_name is letter or _) because this ".name" syntax // is reserved for local labels, "namespaces", virtual // relative offsets inside structures #define is_number(c) (c>='0' and c<='9') #define is_upper(c) (c>='A' and c<='Z') #define is_lower(c) (c>='a' and c<='z') #define is_alpha(c) (is_upper(c) or is_lower(c)) #define is_alpha_n(c) (is_number(c) or is_alpha(c)) #define is_name(c) (is_alpha(c) or c=='_') #define is_name_c(c) (is_alpha_n(c) or c=='_' or c=='.') #define is_space(c) (c==' ' or c==9) #define is_return(c) (c==0xD or c==0xA) #define is_white(c) (is_space(c) or is_return(c)) #define is_visible(c) (c>=33 and c<=126) #define is_end(c) (!c or is_return(c)) // is_symbol first checks is_visible to minimize // calls to text_find(t, c) which searches the entire // list of symbols. begins with common ones: (),.= text c_symbols="(),.=<>-+[]{}'\"!@#$%^&*/;|\\~`"; #define is_symbol(c) \ (is_visible(c) and text_find(c_symbols, c)) #define is_hex_upper(c) (c>='A' and c<='F') #define is_hex_lower(c) (c>='a' and c<='f') #define is_hex_letter(c) (is_hex_upper(c) or is_hex_lower(c)) #define is_hex(c) (is_number(c) or is_hex_letter(c)) #define SLASH_FORWARD '/' #define SLASH_BACKWARD '\\' #define is_slash_f(c) (c==SLASH_FORWARD) #define is_slash_b(c) (c==SLASH_BACKWARD) #define is_slash(c) (is_slash_f(c) or is_slash_b(c)) Get token from source, and advance: Return 0 if end: source=0, *source=0, return, end of line, or if an error occurs. If success, return token_type = T_NAME, T_NUMBER, T_SYMBOL. Code: // copy next token from source. return advanced // address in source, or 0 if end/!source/*source=0 int get_token() { int i=0, c=0, v=0, error=0; // allocate token if necessary if (!setup_token()) return 0; token[0]=0; token_type=T_END; // source address=0 or end? text p=source; if (!p or !*p) { if (!p) source_error("Error (get_token): " \ "Source address=0"); return 0; } // skip all preceding whitespace and comments if (parse_type&PARSE_SKIP_ALL) // all + comments p=skip_all(p); else if (parse_type&PARSE_SKIP_WHITE) // spaces and returns p=skip_white(p); else if (parse_type&PARSE_SKIP_SPACE) // only spaces before p=skip_space(p); if (!*p) // end source? return 0; // is number? decimal, hexadecimal, binary c=*p; if (is_number(c)) { // number: 0... token_type=T_NUMBER; if (*p=='0' and // hexadecimal prefix: 0x7F (p[1]=='x' or p[1]=='X')) { for (i=0, p+=2; is_hex(*p) and i<16; token[i++]=*p++); if (i<16) token[i]=0, v=t2h(token); else error=16; } else { // number: 123. no prefix for (i=0; is_number(*p) and i<10; token[i++]=*p++); token[i]=0; if (*p=='b') { // binary suffix: 1101b p++, v=t2b(token); error=32; } else if (*p=='h') { // hexadecimal suffix: 7Fh p++, v=t2h(token); error=8; } else { // decimal: 123. no suffix v=t2u(token); error=10; } if (i>error) { source_error("Error (get_token): " \ "Value exceeds maximum length (%d)", error); return token_type=0; } } i2t(v, token); token_value_i=v; token_value_type=T_NUMBER; } else if (is_name(c)) { token_type=T_NAME; error=NAME_LENGTH; for (i=0; is_name_c(*p) and i<error; token[i++]=*p++); token[i]=0; if (i==error) { source_error("Error (get_token): " \ "Name exceeds maximum length (%d)", NAME_LENGTH); return token_type=0; } } else if (is_symbol(c)) { token_type=T_SYMBOL; token[0]=*p++, token[1]=0; } else { source_error("Error (get_token): " \ "Invalid character: %d/%02xh/'%c'", c, c, c); token_type=0; } source=p; return token_type; } Match structure and array. 8 maximum. 1k each. Code: // match structures: 8k #define MATCH_SIZE 1024 #define N_MATCHES 8 #define VALUE_SIZE (MATCH_SIZE-NAME_LENGTH) object { char name[NAME_LENGTH], value[VALUE_SIZE]; } MATCH; MATCH *matches=0; int n_matches=0; int setup_match() { if (matches) return 1; n_matches=0; if (!allocate(MATCH *, matches, N_MATCHES*sizeof(MATCH))) { source_error("Error (setup_match): " \ "Memory allocation failed"); return 0; } return 1; } Match function. Return 0 if no match. If success, return 1, and matches[] contains the names and values. Code: // match(pattern, source)... // match pattern in source. extract tokens // from expression, and divide it into sections. // return 1 if success, and assign matches[n_matches] // structures for each variable name in pattern. // uses rarest ` symbol for literal match int match(text pattern, text source) { text p=pattern, s=source, m=0; int i, c, exact, type1, type2; char t1[256], t2[256]; // allocate matches if necessary if (!setup_match()) return 0; // initialize matches MATCH *mp=&matches[0]; memory_zero(mp, N_MATCHES*sizeof(MATCH)); m=mp->value, n_matches=0; if (!p or !s) { source_error("Error (match): Address=0"); return 0; } // skip spaces p=skip_space(p), s=skip_space(s); // "any" keyword? not empty/nothing? // true if source contains something // match("any", "x") = true // match("any", " ") = false if (text_equal(p, "any")) { if (*s and not is_return(*s)) return 1; return 0; } // match(pattern, source) if (is_end(*p)) { // end pattern? if (is_end(*s)) // end source? return 1; // true, finished. return 0; // false, source ends } // before pattern // scroll through pattern and source while (1) { p=skip_space(p); // skip spaces s=skip_space(s); if (is_end(*s)) { // end source? if (is_end(*p)) // end pattern? return 1; // true, finished. return 0; // false, source ends } // before pattern. if (is_end(*p)) // end pattern? return 1; // finished exact=0; // literal `match if (*p=='`') // skip ` exact=1, p++; // get next token from pattern and source: t1, t2. // note: get_token returns 0 if end/!*p p=get_token_from(t1, p), type1=token_type; s=get_token_from(t2, s), type2=token_type; if (!s) // end source? return 0; // false if (!p) // end pattern? return 1; // finished p=skip_space(p), s=skip_space(s); // literal `match if (exact) { if (text_equal(t1, t2)) // true continue; return 0; // false } // variable in pattern? get next match name/value if (type1==T_NAME) { if (n_matches>=8) { source_error("Error (match): " \ "Matches exceed maximum (%d)", n_matches); return 0; } mp=&matches[n_matches++]; // get next match text_copy(mp->name, t1); // name and value (m). m=mp->value; // copy initial value m=text_copy(m, t2); // and advance to end if (is_end(*s)) { // end source? if (is_end(*p)) // end pattern? return 1; // true, both end return 0; // false, source end } } // symbol in pattern? +-* append source token (t2) // to current matches[].value (m), and advance else if (type1==T_SYMBOL) { c=t1[0]; if (type2==T_SYMBOL and c==t2[0]) continue; if (is_end(*s)) // end source? return 0; // symbol not found m=text_attach(m, t2); // attach value // copy characters from source to current // matches[].value until symbol is encountered. // note: may support line continuation with \, // or if ends with certain symbols like =,([{ // which always continues while (*s!=c and not is_end(*s)) *m++=*s++; *m=0; if (is_end(*s)) // end source? return 0; // false, symbol not found s++; // matched, skip } if (is_end(*s)) { // end source? if (is_end(*p)) // end pattern? return 1; // true, both end return 0; // false, source end } if (is_end(*p)) { // end pattern? s=skip_space(s); // advance to end m+=text_n(m); // success; copy remaining characters from // source until end *s=0 or return, end of line. // attach to matches[].value, then return while (not is_end(*s)) *m++=*s++; *m=0; return 1; // true } } source_error("Error (match): " \ "Invalid value"); return 0; }
|
|||||||||||
18 Jul 2023, 02:59 |
|
< Last Thread | Next Thread > |
Forum Rules:
|
Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.
Website powered by rwasa.