##------------------------------
##
##
## Sarchitect Designer 2.3 script
## implementing "Classification Flow-C"
##
##
##
## Shaillay Kumar Dogra
## editor@qsarworld.com
## August 07, 2007
##
##------------------------------


import script
from script.dataset import *
from script.algorithm import *
from script.project import *
from script.view import *
from script.omega import createComponent, showDialog
from javax.swing import *
from com.strandgenomics.cube.dataset import *
import jarray
from math import *


##-------------------------- 
## GET LIST OF CONTINUOUS, UNMARKED COLUMNS
def getcolumnlist(dataset):
	## Get columns, assumption: continuous and unmarked columns	
	indices_continuous = DatasetUtil.getContinuousColumnIndices(dataset)
	indices_nm_continuous = script.project.removeMarkedColumns(dataset,indices_continuous)
	columnList = indices_nm_continuous
	#print columnList
	return columnList

##----------------------------------
##
def getIndexedIntArray(rowHeaderLabels,dataset):
    from com.strandgenomics.cube.framework.data import ArrayUtil, DefaultIntArray
    size = rowHeaderLabels.getSize()
    array = DefaultIntArray(size)
    for i in range(size):
	colName = rowHeaderLabels.get(i)
	c = dataset.getColumn(colName)
	array.add(dataset.indexOf(c))
    return ArrayUtil.createIndexedIntArray(array)

##----------------------------------
##
def getStringArray(rowHeaderLabels):
    array = []
    from com.strandgenomics.cube.framework.data import DefaultIntArray
    size = rowHeaderLabels.getSize()
    for i in range(size):
        array.append(rowHeaderLabels.get(i))
    return array

##----------------------------------


node = script.project.getActiveDatasetNode()
dataset = script.project.getActiveDataset()

#
## This is what gets called from menu: Model->Select Model Descriptors->Kruskal Wallis
## Not using it in script as it requires user-intervention for input
#from script.chem.models.StatisticalCorrelation import createStatCorrUI
#createStatCorrUI()
#


## Kruskal-Wallis Correlation against endpoint to select top N descriptors
result = script.algorithm.kwallisFeatureSelection(test="kwallis",select="Based on rank", rank=100).execute()

inputs = result.getInputs()
dataset = inputs["dataset"]
rankDataset = result["results"]
rowHeaderLabels = result["rowHeaderLabels"]

nameColumn = ColumnFactory.createStringColumn("Descriptor", getStringArray(rowHeaderLabels))
columnList = []
columnList.append(nameColumn)
for i in range(rankDataset.getColumnCount()):
    columnList.append(rankDataset.getColumn(i))

columns = jarray.array(columnList, IColumn)
newDataset = DatasetFactory.createDataset(rankDataset.getName(),columns)

node = script.project.getActiveDatasetNode()

from com.strandgenomics.cube.framework.selection import MappedSelectionModel, DummySelectionModel
from com.strandgenomics.cube.framework.filter import DummyFilterModel
selModel = MappedSelectionModel(node.getContext().getColumnSelectionModel(), getIndexedIntArray(rowHeaderLabels, dataset))

newnode = script.project.addFolderNode("Feature Ranking", node)
script.view.RankFeaturesView(node=newnode, dataset=newDataset, title=newDataset.getName(),  rowSelectionModel=selModel, columnSelectionModel = DummySelectionModel.INSTANCE, filterModel = DummyFilterModel(newDataset.getRowCount())).show()


pvalue = inputs['pvalue']
rank = inputs['rank']
select = inputs['select']
script.algorithm.SelectFeatures(node = node, featureselection = newDataset, dataset = dataset, pvalue = pvalue, rank = rank, select = select).execute(displayResult=1)



node = script.project.getActiveDatasetNode()
dataset = script.project.getActiveDataset()
thisnode = node

collist = getcolumnlist(dataset)
endpoint = DatasetUtil.getMarkedColumnIndices(dataset, "classlabel")
endpoint = ArrayUtil.createIndexedIntArray(endpoint)
endpoint = endpoint.get(0)

## Call Axis-Parallel Decision Tree
algo = script.algorithm.NaiveBayesValidationX(classLabelColumn=endpoint,columnIndices=collist,
validationType="NFold",nFold=3,numRepeats=10)

algo.execute(interactive=0, displayResult=1, newThread=0,lockProject=0)


thisnode.setActive(1)
dataset = script.project.getActiveDataset()
collist = getcolumnlist(dataset)
endpoint = DatasetUtil.getMarkedColumnIndices(dataset, "classlabel")
endpoint = ArrayUtil.createIndexedIntArray(endpoint)
endpoint = endpoint.get(0)

#
## This is what gets called from menu: Model->Select Model Descriptors->Forward/Backward Selection
## Not using it in script as it requires user-intervention for input
#script.spring.featureselection.evaluator.runFSWiz()
#

target_size=10

fsAlgo = script.algorithm.ForwardSelection(dataset=dataset, columnIndices=collist, classLabelColumn= endpoint, targetSize=target_size, fitnessDef="Overall Accuracy", targetAccuracy=100, accuracyType="Validation Accuracy", fitnessEvaluationAlgorithm=script.algorithm.NaiveBayesValidationX(classLabelColumn=endpoint,columnIndices=collist,validationType="NFold",nFold=3,numRepeats=10))

result = fsAlgo.execute(interactive=0, displayResult=1, newThread=0)


##
## END
##