#!/usr/bin/env python3
######################################################################
# DESCRIPTION: Fuzzer dictionary generator
#
# Copyright 2019-2019 by Eric Rippey. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU Lesser
# General Public License Version 3 or the Perl Artistic License Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
######################################################################

# Attempts to pull a list of keywords out of the Flex input
# These are then put in a dictionary of "interesting" sequences
# This will be used to help the fuzzer pick interesting inputs more quickly.

from subprocess import getstatusoutput
from os import system

def take_while(f,a):
    # any(a) => (a->bool)->[a]->[a]
    # Does the same think as Haskell's takewhile.
    out = []
    for elem in a:
        if f(elem):
            out.append(elem)
        else:
            return out
    return out

def skip_while(f,a):
    # any(a) => (a->bool)->[a]->[a]
    # Basically, the opposite thing from skipwhile
    while len(a) and f(a[0]):
        a = a[1:]
    return a

def print_lines(a):
    # printable(a) => [a]->void
    for elem in a:
        print(elem)

def write_file(filename,contents):
    # str->str->void
    f = open(filename,'w')
    f.write(contents)

def parse_line(s):
    # str->maybe str
    if len(s)==0: return
    part = skip_while(lambda x: x!='"',s)
    if len(part)==0 or part[0]!='"': return None
    literal_part = take_while(lambda x: x!='"',part[1:])
    return ''.join(filter(lambda x: x!='\\',literal_part))

def main():
    status,output = getstatusoutput('flex -T ../../src/verilog.l')
    assert status==0

    lines = output.splitlines()
    lines = take_while(lambda x: 'beginning dump of nfa' not in x,lines)
    tokens = set(filter(lambda x: x,map(parse_line,lines)))

    dirname = 'dictionary'
    r = system('mkdir -p '+dirname)
    assert(r==0)
    for i,token in enumerate(tokens):
        write_file(dirname+'/'+str(i),token)

if __name__=='__main__':
    main()
