#!/usr/bin/env Rscript
# Copyright (c) 2011-2012, Universite de Versailles St-Quentin-en-Yvelines
#
# This file is part of ASK.  ASK is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.



suppressPackageStartupMessages(require(gbm, quietly=T))

args <- commandArgs(trailingOnly = T)

if (length(args) != 3) {
    stop("Usage: gbm_build <configuration.conf> <labelled_file> <output_file>")
}

source(file.path(Sys.getenv("ASKHOME"), "common/configuration.R"))
input = read.table(args[2])


# Set up categorical variables as factors
i = 1
for (f in conf("factors")) {
  if (f$type == "categorical") {
    input[,i] = as.factor(input[,i])
  }
  i = i+1
}

weights_file = paste(conf("output_directory"), "hierarchical.weights", sep="/")
if (file.exists(weights_file)) {
    print("GBM will use weights")
    weights = scan(weights_file)
} else {
    weights = rep(1, nrow(input))
}


expr = eval(parse(text=paste("input$V",ncol(input),"~ .",sep="")))
gbm1 = gbm(expr ,
           data=input,
           weights=weights,
           n.trees=conf("modules.model.params.ntrees",3000),
           interaction.depth=conf("modules.model.params.interactiondepth", 8),
           shrinkage=conf("modules.model.params.shrinkage", 0.01),
           distribution=conf("modules.model.params.distribution","gaussian"),
           n.minobsinnode=conf("modules.model.params.minobsinnode",3),
           verbose=conf("modules.model.params.verbose", F))
best.iter = gbm.perf(gbm1,plot.it=FALSE, method="OOB")

save(gbm1,best.iter,file=args[3])

