I use BNFC to generate parser bnfc -m -c ./mylang.cf
. Internally, BNFC makefile calls bison to generate C parser.
Parser.c : mylang.y
${BISON} ${BISON_OPTS} mylang.y -o Parser.c
I can successfully parse source code by calling the generated psProc
method below.
/* Global variables holding parse results for entrypoints. */
Proc YY_RESULT_Proc_ = 0;
// ...
/* Entrypoint: parse Proc from string. */
Proc psProc(const char *str)
{
YY_BUFFER_STATE buf;
mylang__init_lexer(0);
buf = mylang__scan_string(str);
int result = yyparse();
mylang__delete_buffer(buf);
if (result)
{ /* Failure */
return 0;
}
else
{ /* Success */
return YY_RESULT_Proc_;
}
}
struct Proc_;
typedef struct Proc_ *Proc;
struct Proc_
{
enum { is_PGround, is_PCollect, is_PVar, is_PVarRef, is_PNil, is_PSimpleType, is_PNegation, is_PConjunction, is_PDisjunction, is_PEval, is_PMethod, is_PExprs, is_PNot, is_PNeg, is_PMult, is_PDiv, is_PMod, is_PPercentPercent, is_PAdd, is_PMinus, is_PPlusPlus, is_PMinusMinus, is_PLt, is_PLte, is_PGt, is_PGte, is_PMatches, is_PEq, is_PNeq, is_PAnd, is_POr, is_PSend, is_PContr, is_PInput, is_PChoice, is_PMatch, is_PBundle, is_PIf, is_PIfElse, is_PNew, is_PPar } kind;
union
{
struct { Ground ground_; } pground_;
struct { Collection collection_; } pcollect_;
struct { ProcVar procvar_; } pvar_;
struct { Var var_; VarRefKind varrefkind_; } pvarref_;
struct { SimpleType simpletype_; } psimpletype_;
struct { Proc proc_; } pnegation_;
struct { Proc proc_1, proc_2; } pconjunction_;
struct { Proc proc_1, proc_2; } pdisjunction_;
struct { Name name_; } peval_;
struct { ListProc listproc_; Proc proc_; Var var_; } pmethod_;
struct { Proc proc_; } pexprs_;
struct { Proc proc_; } pnot_;
struct { Proc proc_; } pneg_;
struct { Proc proc_1, proc_2; } pmult_;
struct { Proc proc_1, proc_2; } pdiv_;
struct { Proc proc_1, proc_2; } pmod_;
struct { Proc proc_1, proc_2; } ppercentpercent_;
struct { Proc proc_1, proc_2; } padd_;
struct { Proc proc_1, proc_2; } pminus_;
struct { Proc proc_1, proc_2; } pplusplus_;
struct { Proc proc_1, proc_2; } pminusminus_;
struct { Proc proc_1, proc_2; } plt_;
struct { Proc proc_1, proc_2; } plte_;
struct { Proc proc_1, proc_2; } pgt_;
struct { Proc proc_1, proc_2; } pgte_;
struct { Proc proc_1, proc_2; } pmatches_;
struct { Proc proc_1, proc_2; } peq_;
struct { Proc proc_1, proc_2; } pneq_;
struct { Proc proc_1, proc_2; } pand_;
struct { Proc proc_1, proc_2; } por_;
struct { ListProc listproc_; Name name_; Send send_; } psend_;
struct { ListName listname_; Name name_; NameRemainder nameremainder_; Proc proc_; } pcontr_;
struct { Proc proc_; Receipt receipt_; } pinput_;
struct { ListBranch listbranch_; } pchoice_;
struct { ListCase listcase_; Proc proc_; } pmatch_;
struct { Bundle bundle_; Proc proc_; } pbundle_;
struct { Proc proc_1, proc_2; } pif_;
struct { Proc proc_1, proc_2, proc_3; } pifelse_;
struct { ListNameDecl listnamedecl_; Proc proc_; } pnew_;
struct { Proc proc_1, proc_2; } ppar_;
} u;
};
I have a few questions regarding Proc psProc(const char *str)
.
Can I free the source buffer referred by
char *str
parameter immediately afterpsProc
returns? I guess the returnedProc
may contain pointers referring to the input source buffer, So I should ensure the lifetime of source buffer to be longer than the returned pointer. is that correct?How should I free the returned
Proc
? The returnedProc
is a pointer toProc_
, and it makes up an Abstract Syntax Tree by pointers. I only need callfree()
once on the returned pointer to free up, right?Within the method body of
Proc psProc(const char *str)
, it returns a pointer stored in global variableYY_RESULT_Proc_
. Does that mean I must not callpsProc
concurrently from different threads?
These sort of questions should be answered in the documentation for the tool. But I couldn't find them there :-( I don't have a lot of experience with BNFC, either, so be cautious applying this answer.
psProc
calls the lexer'sscan_string
interface, and that interface makes a copy of the provided string. Flex likes to modify the input as it tokenises, so it can only deal withconst char*
inputs by copying them. So the string could have been freed immediately after the call toscan_string
, butpsProc
parses the entire input before returning so you don't have anyway to do that. You can certainly free the string whenpsProc
returns.I doubt whether this is an issue for you, but if you are planning on parsing very large in-memory strings, you might want to consider using
fmemopen
(at least, on Posix platforms) to open the string as aFILE*
. That doesn't avoid the copy, but it does it in chunks of 8k or so, which avoids keeping two copies of the entire string during the parse.I have no idea how BNFC expects you to free parse tree nodes. (In fact, I rather suspect that it doesn't expect you to do that.) The nodes are linked with internal pointers, and it would certainly be possible to write an AST walker which free'd all the nodes recursively using a post-order traverse. But I don't see any generated code which does that. Perhaps I haven't looked hard enough.
Calling
free()
on the top-level node will just free one node. The rest of the tree will then be leaked, since no other pointers to it exist.I'm pretty sure your suspicion about thread safety are correct. The global is assigned to by the reduction action for the
Proc
production, and then later returned bypsProc
. There's no locking, so if there were another parser in another thread, the global could get overwritten. The global is just a pointer to the node which is to be returned, and the node itself should be thread-safe, since it is dynamically-allocated by the parser thread. So you could probably change the declaration of the global(s) to use thread-local storage, but that would have to be done by postprocessing the generated code.