flat assembler
Message board for the users of flat assembler.

Index > High Level Languages > match(pattern, source) in C

Author
Thread Post new topic Reply to topic
soul_master



Joined: 30 Jun 2023
Posts: 24
soul_master 18 Jul 2023, 02:59
An example match(pattern, source) function written in C. It works like FASM's match except that it uses the rarest ` symbol for literal match ("`name"), and =, are not special. Example:

Code:
match("any", " ");
match("`byte a=b", "byte n=1*2+3");
match("`mov a, [b]", "mov eax, [esi+ecx*4]");
match("type name[size]={value}", "int a[4]={5,6,7,8}")    


See attachment > MATCH.C/H. *.BAT requires TinyCC to compile and run. Download TinyC compiler + Notepad++ editor (4mb *.zip): https://github.com/starpow3r/tcc Click [<> Code] > Download .ZIP. Setup: Just copy /TCC/ folder to C:/. Target: C:/TCC/TCC.EXE.

Any improvements, bug reports, or comments are welcome.

Example:
Code:
// MATCH EXAMPLE

#include "match.h"

void test(), test_match();

int main() {
  test_match();
  printf("Success!\n\n");
  system("output.txt");
  getch();
  return 0;
}

void test_match() {
  if (!create_file("output.txt")) {
    source_error("Error (test_match): " \
      "create_file(output)");
    return;
  }
  log("Test match(pattern, source). " \
    "If true: matches[#].name=value\n\n");
  text pattern="type name[size]={value}",
    source="int a[4]={5,6,7,8}";
  log("Example: match(\"%s\", \"%s\")...\n\n",
    pattern, source);

  if (match(pattern, source)) {
    for (int i=0; i<n_matches; i++)
      log("#%d: %s=%s, matches[%d].name=%s, " \
        "matches[%d].value=%s\n",
        i+1, matches[i].name, matches[i].value,
        i, matches[i].name, i, matches[i].value);
  }
  log("\n");
  test("    ", " ");
  test(" ", "hi");
  test("hi", " ");
  test("any", "x");
  test("any", "  ");
  test("x", "y");
  test("a+b", "1+2*3");
  test("a=b", "i=1+2-3*4");
  test("a", "p[1+2-3*4]");
  test("a+b", "1+2-3*4");
  test("a*b", "1+2-3*4");
  test("a-b", "1+2-3*4");
  test("a+b*c", "x*y");
  test("`get", "get");
  test("`int n", "int");
  test("`uint n", "uint size");
  test("`text t", "text a,b,c;");
  test("`byte a=b", "byte n=1*2+3");
  test("`mov a, [b]", "mov eax, [esi+ecx*4]");
  test("v=f(p)", "c=rgb(r,g,b)");
  log("\n");
  close_file();
}

void test(text a, text b) {
  int m=match(a, b);
  log("match(\"%s\", \"%s\") = %s",
    a, b, m ? "True":"False");
  if (m) {
    if (n_matches)
      log(" (%d):", n_matches);
    for (int i=0; i<n_matches; i++) {
      log(" #%d: %s=%s", i+1,
        matches[i].name, matches[i].value);
      if (i<n_matches-1)
        log(",");
    }
  }
  log("\n");
}    


Output:
Code:
Test match(pattern, source). If true: matches[#].name=value

Example: match("type name[size]={value}", "int a[4]={5,6,7,8}")...

#1: type=int, matches[0].name=type, matches[0].value=int
#2: name=a, matches[1].name=name, matches[1].value=a
#3: size=4, matches[2].name=size, matches[2].value=4
#4: value=5,6,7,8, matches[3].name=value, matches[3].value=5,6,7,8

match("    ", " ") = True
match(" ", "hi") = False
match("hi", " ") = False
match("any", "x") = True
match("any", "  ") = False
match("x", "y") = True (1): #1: x=y
match("a+b", "1+2*3") = True (2): #1: a=1, #2: b=2*3
match("a=b", "i=1+2-3*4") = True (2): #1: a=i, #2: b=1+2-3*4
match("a", "p[1+2-3*4]") = True (1): #1: a=p[1+2-3*4]
match("a+b", "1+2-3*4") = True (2): #1: a=1, #2: b=2-3*4
match("a*b", "1+2-3*4") = True (2): #1: a=1+2-3, #2: b=4
match("a-b", "1+2-3*4") = True (2): #1: a=1+2, #2: b=3*4
match("a+b*c", "x*y") = False
match("`get", "get") = True
match("`int n", "int") = False
match("`uint n", "uint size") = True (1): #1: n=size
match("`text t", "text a,b,c;") = True (1): #1: t=a,b,c;
match("`byte a=b", "byte n=1*2+3") = True (2): #1: a=n, #2: b=1*2+3
match("`mov a, [b]", "mov eax, [esi+ecx*4]") = True (2): #1: a=eax, #2: b=esi+ecx*4
match("v=f(p)", "c=rgb(r,g,b)") = True (3): #1: v=c, #2: f=rgb, #3: p=r,g,b    


Helper code: #defines, typedefs, text, conversions.
Code:
typedef unsigned uint;
typedef char *text;

#define and &&
#define or ||
#define not !
#define KB 1024

#define object typedef struct
#define allocate(t,p,n) (p=(t)malloc(n))
#define destroy(p) free(p), p=0
#define memory_zero(p, n) memset(p, 0, n)

// number conversions

text u2t(uint n, text t) {
  text p=t;
  if (!n) {
    *t++='0', *t=0;
    return t;
  }
  while (n)
    *p++=(n%10)+'0', n/=10;
  *p=0;
  text_reverse(t);
  return p;
}

text i2t(int n, text t) {
  if (n<0)
    *t++='-', n=-n;
  return u2t(n, t);
}

uint t2u(text t) {
  uint n;
  for (n=0; *t; t++)
    n=(n*10)+(*t-'0');
  return n;
}

int t2i(text t) {
  int n=0, sign=0;
  if (*t=='-')
    sign=1, t++;
  n=t2u(t);
  if (sign)
    n=-n;
  return n;
}

uint t2h(text t) {
  uint n, c, x;
  for (n=0; *t; t++) {
    c=*t;
    if (c>='0' and c<='9')
      x=(c-'0');
    else if (c>='a' and c<='f')
      x=(c-'a')+10;
    else if (c>='A' and c<='F')
      x=(c-'A')+10;
    else
      break;
    n=(n*16)+x;
  }
  return n;
}

uint t2b(text t) {
  uint n;
  for (n=0; *t; t++)
    n=(n*2)+(*t-'0');
  return n;
}    


Is character of type?
Code:
// is character of type? dot . is a symbol (is_symbol),
// and a name may contain it (is_name_c, "my.name"),
// using . as a separator, but names can't begin with .
// (is_name is letter or _) because this ".name" syntax
// is reserved for local labels, "namespaces", virtual
// relative offsets inside structures

#define is_number(c) (c>='0' and c<='9')
#define is_upper(c) (c>='A' and c<='Z')
#define is_lower(c) (c>='a' and c<='z')
#define is_alpha(c) (is_upper(c) or is_lower(c))
#define is_alpha_n(c) (is_number(c) or is_alpha(c))
#define is_name(c) (is_alpha(c) or c=='_')
#define is_name_c(c) (is_alpha_n(c) or c=='_' or c=='.')
#define is_space(c) (c==' ' or c==9)
#define is_return(c) (c==0xD or c==0xA)
#define is_white(c) (is_space(c) or is_return(c))
#define is_visible(c) (c>=33 and c<=126)
#define is_end(c) (!c or is_return(c))

// is_symbol first checks is_visible to minimize
// calls to text_find(t, c) which searches the entire
// list of symbols. begins with common ones: (),.=

text c_symbols="(),.=<>-+[]{}'\"!@#$%^&*/;Confused|\\~`";

#define is_symbol(c) \
  (is_visible(c) and text_find(c_symbols, c))

#define is_hex_upper(c) (c>='A' and c<='F')
#define is_hex_lower(c) (c>='a' and c<='f')
#define is_hex_letter(c) (is_hex_upper(c) or is_hex_lower(c))
#define is_hex(c) (is_number(c) or is_hex_letter(c))

#define SLASH_FORWARD '/'
#define SLASH_BACKWARD '\\'
#define is_slash_f(c) (c==SLASH_FORWARD)
#define is_slash_b(c) (c==SLASH_BACKWARD)
#define is_slash(c) (is_slash_f(c) or is_slash_b(c))    


Get token from source, and advance: Return 0 if end: source=0, *source=0, return, end of line, or if an error occurs. If success, return token_type = T_NAME, T_NUMBER, T_SYMBOL.
Code:
// copy next token from source. return advanced
// address in source, or 0 if end/!source/*source=0

int get_token() {
  int i=0, c=0, v=0, error=0;
  
  // allocate token if necessary

  if (!setup_token())
    return 0;
  token[0]=0;
  token_type=T_END;

  // source address=0 or end?

  text p=source;
  if (!p or !*p) {
    if (!p)
      source_error("Error (get_token): " \
        "Source address=0");
    return 0;
  }

  // skip all preceding whitespace and comments

  if (parse_type&PARSE_SKIP_ALL) // all + comments
    p=skip_all(p);
  else if (parse_type&PARSE_SKIP_WHITE) // spaces and returns
    p=skip_white(p);
  else if (parse_type&PARSE_SKIP_SPACE) // only spaces before
    p=skip_space(p);

  if (!*p) // end source?
    return 0;

  // is number? decimal, hexadecimal, binary

  c=*p;
  if (is_number(c)) { // number: 0...
    token_type=T_NUMBER;
    if (*p=='0' and // hexadecimal prefix: 0x7F
      (p[1]=='x'
      or p[1]=='X')) {
      for (i=0, p+=2; is_hex(*p)
        and i<16; token[i++]=*p++);
      if (i<16)
        token[i]=0, v=t2h(token);
      else
        error=16;
    }
    else { // number: 123. no prefix
      for (i=0; is_number(*p) and i<10;
        token[i++]=*p++);
      token[i]=0;
      if (*p=='b') { // binary suffix: 1101b
        p++, v=t2b(token);
        error=32;
      }
      else if (*p=='h') { // hexadecimal suffix: 7Fh
        p++, v=t2h(token);
        error=8;
      }
      else { // decimal: 123. no suffix
        v=t2u(token);
        error=10;
      }
      if (i>error) {
        source_error("Error (get_token): " \
          "Value exceeds maximum length (%d)", error);
        return token_type=0;
      }
    }
    i2t(v, token);
    token_value_i=v;
    token_value_type=T_NUMBER;
  }
  else if (is_name(c)) {
    token_type=T_NAME;
    error=NAME_LENGTH;
    for (i=0; is_name_c(*p) and i<error;
      token[i++]=*p++);
    token[i]=0;
    if (i==error) {
      source_error("Error (get_token): " \
        "Name exceeds maximum length (%d)", NAME_LENGTH);
      return token_type=0;
    }
  }
  else if (is_symbol(c)) {
    token_type=T_SYMBOL;
    token[0]=*p++, token[1]=0;
  }
  else {
    source_error("Error (get_token): " \
      "Invalid character: %d/%02xh/'%c'", c, c, c);
    token_type=0;
  }
  source=p;
  return token_type;
}    


Match structure and array. 8 maximum. 1k each.
Code:
// match structures: 8k

#define MATCH_SIZE 1024
#define N_MATCHES 8
#define VALUE_SIZE (MATCH_SIZE-NAME_LENGTH)

object {
  char name[NAME_LENGTH], value[VALUE_SIZE];
} MATCH;

MATCH *matches=0;
int n_matches=0;

int setup_match() {
  if (matches)
    return 1;
  n_matches=0;
  if (!allocate(MATCH *, matches,
    N_MATCHES*sizeof(MATCH))) {
    source_error("Error (setup_match): " \
      "Memory allocation failed");
    return 0;
  }
  return 1;
}    


Match function. Return 0 if no match. If success, return 1, and matches[] contains the names and values.
Code:
// match(pattern, source)...

// match pattern in source. extract tokens
// from expression, and divide it into sections.
// return 1 if success, and assign matches[n_matches]
// structures for each variable name in pattern.
// uses rarest ` symbol for literal match

int match(text pattern, text source) {
  text p=pattern, s=source, m=0;
  int i, c, exact, type1, type2;
  char t1[256], t2[256];

  // allocate matches if necessary

  if (!setup_match())
    return 0;

  // initialize matches

  MATCH *mp=&matches[0];
  memory_zero(mp, N_MATCHES*sizeof(MATCH));
  m=mp->value, n_matches=0;

  if (!p or !s) {
    source_error("Error (match): Address=0");
    return 0;
  }

  // skip spaces

  p=skip_space(p), s=skip_space(s);

  // "any" keyword? not empty/nothing?
  // true if source contains something

  // match("any", "x") = true
  // match("any", " ") = false

  if (text_equal(p, "any")) {
    if (*s and not is_return(*s))
      return 1;
    return 0;
  }

  // match(pattern, source)

  if (is_end(*p)) { // end pattern?
    if (is_end(*s)) // end source?
      return 1;     // true, finished.
    return 0;       // false, source ends
  }                 // before pattern

  // scroll through pattern and source

  while (1) {
    p=skip_space(p);  // skip spaces
    s=skip_space(s);

    if (is_end(*s)) { // end source?
      if (is_end(*p)) // end pattern?
        return 1;     // true, finished.
      return 0;       // false, source ends
    }                 // before pattern.
    if (is_end(*p))   // end pattern?
      return 1;       // finished

    exact=0;          // literal `match
    if (*p=='`')      // skip `
      exact=1, p++;

    // get next token from pattern and source: t1, t2.
    // note: get_token returns 0 if end/!*p

    p=get_token_from(t1, p), type1=token_type;
    s=get_token_from(t2, s), type2=token_type;

    if (!s)     // end source?
      return 0; // false
    if (!p)     // end pattern?
      return 1; // finished

    p=skip_space(p), s=skip_space(s);

    // literal `match

    if (exact) {
      if (text_equal(t1, t2)) // true
        continue;
      return 0; // false
    }

    // variable in pattern? get next match name/value

    if (type1==T_NAME) {
      if (n_matches>=8) {
        source_error("Error (match): " \
          "Matches exceed maximum (%d)", n_matches);
        return 0;
      }
      mp=&matches[n_matches++];  // get next match
      text_copy(mp->name, t1);   // name and value (m).
      m=mp->value;               // copy initial value
      m=text_copy(m, t2);        // and advance to end

      if (is_end(*s)) { // end source?
        if (is_end(*p)) // end pattern?
          return 1;     // true, both end
        return 0;       // false, source end
      }
    }

    // symbol in pattern? +-* append source token (t2)
    // to current matches[].value (m), and advance

    else if (type1==T_SYMBOL) {
      c=t1[0];
      if (type2==T_SYMBOL and c==t2[0])
        continue;
      if (is_end(*s)) // end source?
        return 0;     // symbol not found

      m=text_attach(m, t2); // attach value

      // copy characters from source to current
      // matches[].value until symbol is encountered.
      // note: may support line continuation with \,
      // or if ends with certain symbols like =,([{
      // which always continues

      while (*s!=c and not is_end(*s))
        *m++=*s++;
      *m=0;
      if (is_end(*s)) // end source?
        return 0;     // false, symbol not found
      s++;            // matched, skip
    }

    if (is_end(*s)) { // end source?
      if (is_end(*p)) // end pattern?
        return 1;     // true, both end
      return 0;       // false, source end
    }

    if (is_end(*p)) {  // end pattern?
      s=skip_space(s); // advance to end
      m+=text_n(m);

      // success; copy remaining characters from
      // source until end *s=0 or return, end of line.
      // attach to matches[].value, then return

      while (not is_end(*s))
        *m++=*s++;
      *m=0;
      return 1; // true
    }
  }
  source_error("Error (match): " \
    "Invalid value");
  return 0;
}    


Description: Example (Updated)
Download
Filename: match.zip
Filesize: 12.43 KB
Downloaded: 137 Time(s)

Post 18 Jul 2023, 02:59
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.