Pisu nejaky disassembler pro ne uplne bezny RISCovy procesor. Potrebuju nejak rozumne v cecku udelat to, abych mohl porovnavat masky instrukci (vyznamove bity jsou ruzne rozesety) a soucasne jsem mohl extrahovat jednotlive bity do promennych. Tohle se pise hrozne blbe v cistem C, tak jsem zacal psat program, ktery rozepise kazde volani fiktivni fce bitmatch(var,bitmask[,assignments]) tak, aby vratila nonzero kdyz dojde k matchi instrukce a vyextrahovala mi bity do promennych. Takhle zustava kod celkem slusne citelny a je to syntakticky podobne cecku, takze jdou pouzit indentory, syntax highlighting, apod. Zde je priklad:
if (bitmatch
(opcode, '01010a100aabab.' /*an opcode */ , rd /*target reg */ =
b, rs = a))
{
printf ("ok");
}
vystup:
if (/*.................................................01010a100aabab.*/ ((opcode & 0x7DC0) == 0x2900) && (rd = ((opcode>>1)&0x1)<<0|((opcode>>3)&0x1)<<1,rs = ((opcode>>2)&0x1)<<0|((opcode>>4)&0x3)<<1|((opcode>>9)&0x1)<<3,1))
{
printf ("ok");
}
(neni to jeste dodelane, tohle je fiktivni instrukce, a je pozde a nejsem si jist jestli tam nemam nejakou botu)
Z jineho projektu mam napsany C lexikalni analyzator (tokenizer), ktery jsem na to pouzil, jeho funkce je primitivni, najde dalsi token a naplni strukturu s informacemi o tokenu. Udelal jsem tedy primitivni preprocesor, ktery z meho pseudo-c generuje cecko (lokalizuje identifikator bitmatch a slepe prepise do C kodu):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ctok.h"
void
synterr (struct ctok *tok, const char *whatiswrong)
{
printf ("Syntax error: %s\n", whatiswrong);
exit (1);
}
int
main (int argc, char **argv)
{
struct ctok tok;
int pos, lastwpos;
int i, j, k;
int bs, be, bp;
char varname[0x100], valname[0x100], bitmask[64];
unsigned long long bmmask, bmval;
if (argc != 3)
{
printf ("Usage: %s [infile.d.c] [outfile.c]\n", argv[0]); // xxx outfile se ted nepouziva
exit (1);
}
if (ctok_readfile (argv[1]) <= 0)
{
perror (argv[1]);
exit (1);
}
fprintf (stdout, "/*Automatically generated by %s, DO NOT EDIT! */\n\n",
argv[0]);
lastwpos = pos = 0;
while (ctok_gettoken (pos, &tok, CTOK_FLAG_IGNORECOMMENTS) > CTOK_EOF)
{
// Search for identifier "bitmatch"
if (tok.toktype == CTOK_IDENT && tok.len == 8
&& !memcmp (tok.data, "bitmatch", tok.len))
{
if (lastwpos < pos)
{
fwrite (ctok_getdata (lastwpos), pos - lastwpos, 1, stdout);
}
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_SYMBOL && tok.info != '(')
synterr (&tok, "Expected (");
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_IDENT)
synterr (&tok, "Expected variable identifier");
memcpy (varname, tok.data, tok.len);
varname[tok.len] = 0;
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_SYMBOL || tok.info != ',')
synterr (&tok, "Expected \',\' after variable identifier");
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_CHARLIT)
synterr (&tok, "Expected bitmask");
memcpy (bitmask + 64 - tok.len, tok.data, tok.len);
// ignore spaces in bits, these are just for better readibility
for (i = 0, j = 0; i < tok.len; i++)
if (tok.data[i] != ' ')
bitmask[j++] = tok.data[i];
memmove (bitmask + 64 - j, bitmask, j);
memset (bitmask, '.', 64 - j);
bmval = bmmask = 0;
for (i = 0; i < 64; i++)
if (bitmask[63 - i] == '0' || bitmask[63 - i] == '1')
{
bmmask |= 1ULL << i;
if (bitmask[63 - i] == '1')
bmval |= 1ULL << i;
}
printf ("/*%64.64s*/ ((%s & 0x%llX%s) == 0x%llX%s) && (", bitmask,
varname, bmmask, (bmmask >> 32) != 0 ? "ULL" : "", bmval,
(bmval >> 32) != 0 ? "ULL" : "");
while (1)
{
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_SYMBOL
|| (tok.info != ')' && tok.info != ','))
synterr (&tok, "Expected , or )");
if (tok.info == ')')
break;
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_IDENT)
synterr (&tok, "Expected variable identifier");
memcpy (valname, tok.data, tok.len);
valname[tok.len] = 0;
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_SYMBOL
|| tok.info != '=')
synterr (&tok, "Expected = for bits assignment");
if (ctok_gettoken
(ctok_nexttokpos (&tok), &tok,
CTOK_FLAG_IGNORECOMMENTS) != CTOK_IDENT || tok.len != 1)
synterr (&tok, "Expected bit letter/identifier");
printf ("%s = ", valname);
// calculate bit extraction equations
for (bp = 0, i = 0; i < 64; i++)
if (bitmask[64 - 1 - i] == tok.data[0])
{
bs = i;
while (i < 64 && bitmask[64 - 1 - i] == tok.data[0])
i++;
be = i;
if (bp != 0)
printf ("|");
printf ("((%s>>%d)&0x%X)<<%d", varname, bs,
(1 << (be - bs)) - 1, bp);
bp += be - bs;
}
if (bp == 0)
{
printf ("No matching bits for variable %s\n", valname);
exit (1);
}
printf (",");
} // while new stuff for bit assignment
printf ("1)");
lastwpos = ctok_nexttokpos (&tok);
} // if "bitmatch" identifier token
pos = ctok_nexttokpos (&tok);
}
if (tok.toktype != CTOK_EOF)
synterr (&tok, "C parser error");
if (lastwpos < pos)
{
fwrite (ctok_getdata (lastwpos), pos - lastwpos, 1, stdout);
}
return 0;
}
Cilove prekladace jsou gcc, llvm, msvc.
Ma to 2 problemy:
- idealne bych potreboval nejak vystup poslat do prekladace bez generovani mezisouboru. Videl jsem nejake snahy o vlastni preprocesor (
https://stackoverflow.com/questions/3545875/custom-gcc-preprocessor ), ale takhle to nechci, navic msvc (Windows) bude problem. Idealni by byl nejaky parametr typu '-custom-preprocessor-before-cpp1=...'. Nenasel jsem. Nebo to tam dostat pres pipe. Nenasel jsem jak.
- potrebuju zachovat cisla radek (pocitejme s tim, ze nahrazovane "volani" bitmatch budou i viceradkova, s komentari,atd.) , aby kdyz se seknu nekde v kodu slo hledat chyby (nutne aby to chodilo alespon v gcc/llvm, msvc by bylo fajn, predpokladam ze #line by to nejak mel resit, ale nejak se mi zatim nedari. Jak na to?
Mimo tyto dotazy by mne jeste zajimalo, jestli nejake moderni jazyky poskytuji knihovny, ktere tohle umoznuji napsat s kratsim kodem. C tokenizer vyuziva jen libc a ma pocty radek: 740 ctok.c / 177 ctok.h; jsem takova kozerva, treba mi nekdo ukazete ze s "modernim programovanim" to jde udelat jednoduseji.