# This script creates a new database, loads the XML files dblp.xml,
# sprot.xml, and treebank.xml to database tables, removes the root
# node of the XML, and copies 200 of the resulting subtrees to a new
# table. The database parameters (host,name,user,password) are
# configured in the file "./config.txt".
#
# The progress of the script is written to a log file.

# set java class path
oldclasspath=${CLASSPATH}
CLASSPATH=.
for i in lib/*.jar ; do CLASSPATH=${CLASSPATH}:${i} ; done

# load database configuration
source config.txt
    
# create database
echo Creating new mysql database \"${db}\" on \"${host}\"...
mysql -h ${host} -u ${user} -p${pwd} -e "create database ${db}"

for source in dblp sprot treebank
do
    # configure variables for the current data source
    file=data/${source}.xml
    table=${source}
    logfile=log/${source}/load.log

    if cmp -s data/${source}.xml data/${source}.template.xml 
    then
	echo
	echo -----------------------------------------------------------------
	echo NOTE: data/${source}.xml is only a template. You can run the 
        echo experiment with the template, but you will get different results.
        echo Replace the template with the original file to get the same 
	echo results that we show in the paper. More info in README.TXT.
	echo Press [ENTER] to continue.
	echo -----------------------------------------------------------------
	read
    fi
    
    # load XML file to table
    echo Loading data from file \"${file}\" to table \"${table}\"...
    echo "Log file: $logfile"
    java -DentityExpansionLimit=20000000 executable.LoadXMLForest \
	${table} ie ${file} > ${logfile}
    # delete root node of XML
    java executable.SplitForest \
	${table} 1 T${table} >> ${logfile}
    # choose subtrees for the experiment
    java executable.RandomCopySubForest \
	T${table} T${table}Sfr 200 15 0 >> ${logfile}
done

# restore java classpath
CLASSPATH=${oldclasspath}
