Unfortunately, this feature is not available in the older version of Spark MLlib 1.5.1.
However, you can find it in the recent Pipeline API in Spark MLlib 2.x as RandomForestClassifier:
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
import org.apache.spark.mllib.util.MLUtils
val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF
val labelIndexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("indexedLabel").fit(data)
val featureIndexer = new VectorIndexer()
.setInputCol("features")
.setOutputCol("indexedFeatures")
.setMaxCategories(4).fit(data)
val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
val rf = new RandomForestClassifier()
.setLabelCol(labelIndexer.getOutputCol)
.setFeaturesCol(featureIndexer.getOutputCol)
.setNumTrees(10)
val labelConverter = new IndexToString()
.setInputCol("prediction")
.setOutputCol("predictedLabel")
.setLabels(labelIndexer.labels)
val pipeline = new Pipeline()
.setStages(Array(labelIndexer, featureIndexer, rf, labelConverter))
val model = pipeline.fit(trainingData)
val predictions = model.transform(testData)
predictions.show(10)
: Spark MLlib ML - .
:
predictionCol .rawPredictionCol # node, ( ).probabilityCol # , rawPrediction, ( ).