MachineLearningAlgorithms / data /Random-Forest-r.json
deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "Random Forest Mastery: 100 MCQs",
"description": "A comprehensive set of multiple-choice questions designed to test and deepen your understanding of Random Forest, covering fundamentals, parameters, ensemble concepts, and practical applications.",
"questions": [
{
"id": 1,
"questionText": "What is Random Forest primarily used for?",
"options": [
"Only Clustering",
"Only Time Series",
"Only Image Processing",
"Classification and Regression"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest is a versatile ensemble method used for both classification and regression tasks."
},
{
"id": 2,
"questionText": "Random Forest is an example of which type of learning?",
"options": [
"Supervised Learning",
"Unsupervised Learning",
"Self-Supervised Learning",
"Reinforcement Learning"
],
"correctAnswerIndex": 0,
"explanation": "Random Forest is trained using labeled data, so it is supervised learning."
},
{
"id": 3,
"questionText": "What is the base algorithm used inside a Random Forest?",
"options": [
"Linear Regression",
"K-Means",
"Decision Trees",
"Neural Networks"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest builds multiple Decision Trees and combines them."
},
{
"id": 4,
"questionText": "Why is it called 'Random' Forest?",
"options": [
"Because it gives random answers",
"Because trees are random shapes",
"Because it uses randomness in data and features",
"Because it is used randomly"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest randomly selects data samples and features to build diverse trees."
},
{
"id": 5,
"questionText": "What does Random Forest reduce compared to a single Decision Tree?",
"options": [
"Accuracy",
"Computation Time",
"Overfitting",
"Data Size"
],
"correctAnswerIndex": 2,
"explanation": "By combining many trees, Random Forest reduces overfitting."
},
{
"id": 6,
"questionText": "What technique does Random Forest use to train different trees?",
"options": [
"Gradient Descent",
"Bootstrap Sampling",
"Dropout",
"Pooling"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest uses bootstrap sampling (bagging) to create different training subsets."
},
{
"id": 7,
"questionText": "Random Forest is an example of which ensemble method?",
"options": [
"Boosting",
"Stacking",
"Bagging",
"Reinforcement"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest is a bagging-based ensemble learning method."
},
{
"id": 8,
"questionText": "Which metric is commonly used to measure feature importance in Random Forest?",
"options": [
"Euclidean Distance",
"Entropy Loss",
"Gini Importance",
"Cosine Similarity"
],
"correctAnswerIndex": 2,
"explanation": "Gini Impurity is used to decide splits, and feature importance is derived from it."
},
{
"id": 9,
"questionText": "What does each individual tree in a Random Forest output during classification?",
"options": [
"A regression score only",
"A class prediction",
"A probability distribution",
"A clustering label"
],
"correctAnswerIndex": 1,
"explanation": "Each tree predicts a class, and Random Forest takes the majority vote."
},
{
"id": 10,
"questionText": "How does Random Forest make the final prediction in classification?",
"options": [
"Majority voting",
"Max pooling",
"Averaging",
"Sorting"
],
"correctAnswerIndex": 0,
"explanation": "Random Forest predicts the class with the highest number of votes from trees."
},
{
"id": 11,
"questionText": "What happens if we increase the number of trees in Random Forest?",
"options": [
"Accuracy usually improves",
"Model becomes unstable",
"Accuracy always decreases",
"It deletes trees randomly"
],
"correctAnswerIndex": 0,
"explanation": "More trees reduce variance and improve accuracy until a saturation point."
},
{
"id": 12,
"questionText": "What kind of data can Random Forest handle?",
"options": [
"Only numerical",
"Only text data",
"Both categorical and numerical",
"Only time series"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest works well with mixed data types."
},
{
"id": 13,
"questionText": "Random Forest is robust to which problem?",
"options": [
"Large memory usage",
"Outliers",
"Class imbalance",
"Overfitting"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest reduces overfitting by combining multiple trees."
},
{
"id": 14,
"questionText": "What is the default criterion for splitting nodes in Random Forest classification?",
"options": [
"MAE",
"Gini Impurity",
"MSE",
"Cosine Distance"
],
"correctAnswerIndex": 1,
"explanation": "Gini impurity is the default split criterion for classification."
},
{
"id": 15,
"questionText": "How does Random Forest handle missing values?",
"options": [
"It ignores all rows",
"It can handle them fairly well",
"It crashes immediately",
"It replaces them with zeros"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest can handle missing values better than many algorithms."
},
{
"id": 16,
"questionText": "What is the advantage of Random Forest over a single Decision Tree?",
"options": [
"No training required",
"Always 100% accuracy",
"Higher accuracy",
"Less training time"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest is more accurate than a single Decision Tree due to ensemble voting."
},
{
"id": 17,
"questionText": "What type of sampling is used in Random Forest?",
"options": [
"Sequential sampling",
"Sampling with replacement",
"K-fold only",
"Sampling without replacement"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest uses bootstrap sampling, which is sampling with replacement."
},
{
"id": 18,
"questionText": "What does each tree in Random Forest learn from?",
"options": [
"Only 50% of all features",
"Only one class of data",
"A random subset of data",
"The entire dataset"
],
"correctAnswerIndex": 2,
"explanation": "Each tree is trained on different bootstrapped samples."
},
{
"id": 19,
"questionText": "What happens if the number of trees is too small?",
"options": [
"Model becomes overconfident",
"It increases memory usage too much",
"It always overfits",
"Model may underfit"
],
"correctAnswerIndex": 3,
"explanation": "Too few trees may result in underfitting and poor accuracy."
},
{
"id": 20,
"questionText": "Random Forest reduces variance by?",
"options": [
"Adding dropout",
"Averaging multiple trees",
"Increasing learning rate",
"Minimizing entropy"
],
"correctAnswerIndex": 1,
"explanation": "Averaging predictions reduces variance and improves generalization."
},
{
"id": 21,
"questionText": "What is the method used to combine predictions in Random Forest?",
"options": [
"Majority voting",
"Stacking",
"Gradient descent",
"Concatenation"
],
"correctAnswerIndex": 0,
"explanation": "Classification is done using majority vote."
},
{
"id": 22,
"questionText": "What happens during training if two trees see different features?",
"options": [
"They predict randomly",
"They become identical",
"They learn different patterns",
"They crash"
],
"correctAnswerIndex": 2,
"explanation": "Feature randomness ensures diverse learning across trees."
},
{
"id": 23,
"questionText": "Is Random Forest sensitive to feature scaling?",
"options": [
"Yes",
"Only for categorical features",
"Only for small datasets",
"No"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest does not require normalization or scaling."
},
{
"id": 24,
"questionText": "Random Forest internally uses how many Decision Trees?",
"options": [
"Based on dataset size",
"User-defined number",
"Exactly 10",
"Always 1"
],
"correctAnswerIndex": 1,
"explanation": "The number of trees is set by the user using the 'n_estimators' parameter."
},
{
"id": 25,
"questionText": "Random Forest works well when the dataset is?",
"options": [
"Only with time series",
"Large with many features",
"Only with text data",
"Very small only"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest performs well with high-dimensional and large datasets."
},
{
"id": 26,
"questionText": "What is the output of Random Forest for binary classification?",
"options": [
"Probability only",
"Only 1",
"Only 0",
"0 or 1"
],
"correctAnswerIndex": 3,
"explanation": "The final output is a class label like 0 or 1."
},
{
"id": 27,
"questionText": "What is 'n_estimators' in Random Forest?",
"options": [
"Number of features",
"Number of layers",
"Number of epochs",
"Number of trees"
],
"correctAnswerIndex": 3,
"explanation": "'n_estimators' defines how many Decision Trees to train."
},
{
"id": 28,
"questionText": "What happens if all trees in Random Forest agree?",
"options": [
"Model crashes",
"Accuracy drops",
"High confidence in prediction",
"It becomes regression"
],
"correctAnswerIndex": 2,
"explanation": "More agreement among trees increases prediction confidence."
},
{
"id": 29,
"questionText": "Which parameter controls the depth of trees in Random Forest?",
"options": [
"n_estimators",
"learning_rate",
"max_depth",
"n_clusters"
],
"correctAnswerIndex": 2,
"explanation": "max_depth controls how deep each tree can grow."
},
{
"id": 30,
"questionText": "What is a potential drawback of Random Forest?",
"options": [
"Cannot classify data",
"Needs feature scaling",
"High memory usage",
"Always underfits"
],
"correctAnswerIndex": 2,
"explanation": "Training many trees can consume large memory and computation."
},
{
"id": 31,
"questionText": "What is the main reason Random Forest performs well compared to a single tree?",
"options": [
"It removes features randomly",
"It increases bias intentionally",
"It uses deep neural layers",
"It averages multiple trees to reduce variance"
],
"correctAnswerIndex": 3,
"explanation": "Averaging multiple independent trees stabilizes the predictions and lowers overfitting."
},
{
"id": 32,
"questionText": "What does the term 'out-of-bag' (OOB) error mean in Random Forest?",
"options": [
"Training error on all data",
"Error on random subsets",
"Loss on test set only",
"Error on unseen samples not used in training trees"
],
"correctAnswerIndex": 3,
"explanation": "OOB error estimates model performance using samples not included in the bootstrap subset."
},
{
"id": 33,
"questionText": "How does Random Forest ensure diversity among trees?",
"options": [
"By pruning all trees equally",
"Using same random seed",
"Random sampling of data and features",
"Training all trees on same data"
],
"correctAnswerIndex": 2,
"explanation": "Bootstrapping and random feature selection introduce variation between trees."
},
{
"id": 34,
"questionText": "Which of the following parameters controls the number of features considered for splitting?",
"options": [
"min_samples_split",
"max_features",
"n_estimators",
"max_depth"
],
"correctAnswerIndex": 1,
"explanation": "max_features limits how many features are chosen at each split, encouraging diversity."
},
{
"id": 35,
"questionText": "What happens if 'max_features' is set to 1 in a Random Forest?",
"options": [
"Each tree becomes highly decorrelated",
"All trees are identical",
"Model becomes identical to a single tree",
"Training stops early"
],
"correctAnswerIndex": 0,
"explanation": "When only one feature is chosen at each split, trees are very different, improving ensemble strength."
},
{
"id": 36,
"questionText": "Which evaluation metric is best for imbalanced classification using Random Forest?",
"options": [
"Accuracy",
"F1-score",
"MSE",
"R²"
],
"correctAnswerIndex": 1,
"explanation": "F1-score balances precision and recall, making it ideal for imbalanced datasets."
},
{
"id": 37,
"questionText": "Random Forest handles overfitting better than a single decision tree mainly due to?",
"options": [
"Ensemble averaging",
"Deep pruning",
"More bias",
"Gradient descent"
],
"correctAnswerIndex": 0,
"explanation": "Averaging the outputs of multiple uncorrelated trees reduces overfitting."
},
{
"id": 38,
"questionText": "What is the typical relationship between bias and variance in Random Forest?",
"options": [
"High bias, low variance",
"Low bias, high variance",
"High bias, high variance",
"Low bias, low variance"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest balances both bias and variance well due to its ensemble structure."
},
{
"id": 39,
"questionText": "In Random Forest, which trees are used to predict a test sample?",
"options": [
"Random subset of trees",
"Only first tree",
"All trees in the ensemble",
"Last tree only"
],
"correctAnswerIndex": 2,
"explanation": "Each tree contributes to prediction, and results are aggregated by majority voting."
},
{
"id": 40,
"questionText": "What is the purpose of 'random_state' in Random Forest?",
"options": [
"Increasing randomness",
"Feature selection",
"Reproducibility",
"Performance improvement"
],
"correctAnswerIndex": 2,
"explanation": "random_state ensures the same random sampling for consistent results."
},
{
"id": 41,
"questionText": "What is the role of 'min_samples_split' in Random Forest?",
"options": [
"Number of bootstrap samples",
"Total number of features used",
"Maximum leaf nodes allowed",
"Minimum number of samples required to split an internal node"
],
"correctAnswerIndex": 3,
"explanation": "It prevents splits when a node has too few samples, reducing overfitting."
},
{
"id": 42,
"questionText": "What is feature importance in Random Forest?",
"options": [
"A pruning factor",
"A clustering metric",
"A measure of data imbalance",
"A score showing how useful a feature is for prediction"
],
"correctAnswerIndex": 3,
"explanation": "Feature importance reflects how much each feature reduces impurity in trees."
},
{
"id": 43,
"questionText": "What technique is used by Random Forest to combine multiple tree outputs?",
"options": [
"Stacking",
"Boosting",
"Bagging",
"Dropout"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest is based on bagging — bootstrap aggregation of decision trees."
},
{
"id": 44,
"questionText": "If Random Forest has too many trees, what is the likely result?",
"options": [
"Accuracy decreases",
"Overfitting increases",
"Computation cost increases",
"Model becomes unstable"
],
"correctAnswerIndex": 2,
"explanation": "After a certain number, adding trees only increases computation without much gain."
},
{
"id": 45,
"questionText": "Which parameter limits how deep a tree can grow?",
"options": [
"n_estimators",
"max_depth",
"criterion",
"max_features"
],
"correctAnswerIndex": 1,
"explanation": "max_depth sets the maximum depth, controlling model complexity."
},
{
"id": 46,
"questionText": "What is the main drawback of Random Forest in large datasets?",
"options": [
"Low accuracy",
"High computational cost",
"High bias",
"No randomness"
],
"correctAnswerIndex": 1,
"explanation": "Training hundreds of trees can be time-consuming for large datasets."
},
{
"id": 47,
"questionText": "Which of these can Random Forest NOT handle directly?",
"options": [
"Categorical data",
"Sequential time dependencies",
"Missing values",
"Large datasets"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest doesn’t model time dependencies, so it's not ideal for time series."
},
{
"id": 48,
"questionText": "How is randomness introduced in Random Forest?",
"options": [
"Bootstrap sampling and random feature selection",
"Gradient descent",
"Batch normalization",
"Learning rate scheduling"
],
"correctAnswerIndex": 0,
"explanation": "Random Forest introduces randomness both in data and feature sampling."
},
{
"id": 49,
"questionText": "What type of ensemble method is Random Forest?",
"options": [
"Voting",
"Bagging",
"Boosting",
"Stacking"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest uses bagging (bootstrap aggregation) to train multiple trees."
},
{
"id": 50,
"questionText": "What is the relationship between Decision Tree depth and overfitting?",
"options": [
"Deeper trees tend to overfit",
"Deeper trees always underfit",
"Depth has no effect",
"Shallow trees always overfit"
],
"correctAnswerIndex": 0,
"explanation": "Large tree depth can cause the model to memorize training data patterns."
},
{
"id": 51,
"questionText": "What happens to the Random Forest model if trees are too shallow?",
"options": [
"Model overfits",
"Training time increases",
"Variance increases",
"Model underfits"
],
"correctAnswerIndex": 3,
"explanation": "Shallow trees can't capture complex data patterns."
},
{
"id": 52,
"questionText": "Why does Random Forest not require feature scaling?",
"options": [
"It normalizes automatically",
"It splits based on thresholds, not distance",
"It uses Euclidean distance",
"It drops correlated features"
],
"correctAnswerIndex": 1,
"explanation": "Tree-based methods are invariant to feature scaling."
},
{
"id": 53,
"questionText": "What happens if all trees are trained on identical bootstrap samples?",
"options": [
"Higher accuracy",
"No effect",
"Reduced diversity",
"Faster training"
],
"correctAnswerIndex": 2,
"explanation": "Lack of randomness among trees reduces ensemble benefit."
},
{
"id": 54,
"questionText": "Which statement is TRUE about Random Forest?",
"options": [
"It removes all bias",
"It reduces bias but increases variance",
"It increases both bias and variance",
"It reduces variance but keeps bias low"
],
"correctAnswerIndex": 3,
"explanation": "Bagging in Random Forest reduces variance without significantly increasing bias."
},
{
"id": 55,
"questionText": "In Random Forest, what does 'bootstrap=True' mean?",
"options": [
"No randomness is applied",
"Each tree skips feature selection",
"All trees use the full dataset",
"Each tree is trained on a random sample with replacement"
],
"correctAnswerIndex": 3,
"explanation": "Bootstrap sampling ensures each tree sees a slightly different dataset."
},
{
"id": 56,
"questionText": "How is feature importance calculated in Random Forest?",
"options": [
"Based on learning rate",
"Using feature frequency",
"By gradient descent",
"Based on impurity reduction"
],
"correctAnswerIndex": 3,
"explanation": "It measures how much each feature decreases node impurity across all trees."
},
{
"id": 57,
"questionText": "What is a typical hyperparameter tuning technique for Random Forest?",
"options": [
"Grid Search or Random Search",
"K-means",
"Dropout",
"Gradient Descent"
],
"correctAnswerIndex": 0,
"explanation": "Both Grid and Random Search are popular for hyperparameter tuning."
},
{
"id": 58,
"questionText": "What happens if we set 'n_estimators' too high?",
"options": [
"Lower accuracy",
"Longer training time",
"Underfitting",
"Loss of randomness"
],
"correctAnswerIndex": 1,
"explanation": "Too many trees make training slow, though accuracy improvement becomes marginal."
},
{
"id": 59,
"questionText": "How is Random Forest resistant to overfitting?",
"options": [
"Using deeper trees",
"Gradient correction",
"Averaging independent trees",
"Removing bias"
],
"correctAnswerIndex": 2,
"explanation": "Averaging many independent models cancels out noise and variance."
},
{
"id": 60,
"questionText": "Which of the following best describes the Random Forest algorithm?",
"options": [
"A single large decision tree",
"Linear regression with trees",
"Stacked boosting method",
"Ensemble of decision trees trained on random subsets"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest is an ensemble approach using bagging and random feature selection."
},
{
"id": 61,
"questionText": "What is the main reason Random Forest works well even with noisy data?",
"options": [
"It applies dropout regularization",
"It removes noise automatically",
"It memorizes noise across all trees",
"It averages multiple trees to smooth out noise"
],
"correctAnswerIndex": 3,
"explanation": "Averaging predictions of multiple trees reduces the impact of noise in data."
},
{
"id": 62,
"questionText": "Which technique helps Random Forest estimate generalization error without a validation set?",
"options": [
"Cross-validation only",
"Early stopping",
"Out-of-Bag (OOB) estimation",
"Dropout sampling"
],
"correctAnswerIndex": 2,
"explanation": "OOB samples are not seen during training, allowing internal error estimation."
},
{
"id": 63,
"questionText": "What is the effect of increasing 'min_samples_split' too much?",
"options": [
"Model may underfit",
"Model may overfit",
"Training crashes",
"Bias becomes zero"
],
"correctAnswerIndex": 0,
"explanation": "Larger 'min_samples_split' prevents deeper splits, reducing learning capacity."
},
{
"id": 64,
"questionText": "What is the typical output of Random Forest in binary classification?",
"options": [
"Always continuous output",
"Softmax score",
"Only probability",
"Majority class from all trees"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest uses majority voting to decide final class."
},
{
"id": 65,
"questionText": "In Random Forest, what happens if we disable bootstrap sampling?",
"options": [
"All trees become identical",
"Each tree will see full dataset",
"Training becomes impossible",
"Feature importance cannot be calculated"
],
"correctAnswerIndex": 1,
"explanation": "bootstrap=False means no sampling, trees are trained on complete dataset."
},
{
"id": 66,
"questionText": "Which Random Forest parameter controls how many features a single split considers?",
"options": [
"min_samples_split",
"max_depth",
"max_features",
"n_estimators"
],
"correctAnswerIndex": 2,
"explanation": "Randomly selecting only 'max_features' at each split ensures diversity."
},
{
"id": 67,
"questionText": "Which situation is most ideal for using Random Forest?",
"options": [
"Low-dimensional time series",
"Fully labeled image datasets only",
"Continuous text data",
"High-dimensional structured tabular data"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest is excellent for large structured numeric + categorical datasets."
},
{
"id": 68,
"questionText": "How does Random Forest improve generalization?",
"options": [
"By memorizing data patterns",
"By deep pruning all trees",
"By increasing bias",
"By reducing variance using averaging"
],
"correctAnswerIndex": 3,
"explanation": "Averaging predictions from many uncorrelated trees reduces variance."
},
{
"id": 69,
"questionText": "What is a scenario where Random Forest might perform poorly?",
"options": [
"Large tabular dataset",
"Handling missing values",
"Highly sequential time-based data",
"Text classification with manual encoding"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest is not designed to understand sequential temporal dependencies."
},
{
"id": 70,
"questionText": "What is the advantage of using 'max_samples' parameter in Random Forest?",
"options": [
"It forces normalization",
"It increases tree depth",
"It controls how many samples each tree sees",
"It controls feature count"
],
"correctAnswerIndex": 2,
"explanation": "max_samples limits data per tree to improve speed and variability."
},
{
"id": 71,
"questionText": "Why is Random Forest called a 'bagging' technique?",
"options": [
"It merges deep networks",
"It sequentially boosts errors",
"It uses bootstrap sampling + aggregation",
"It stacks models layer by layer"
],
"correctAnswerIndex": 2,
"explanation": "Random Forest is based on Bagging = Bootstrap + Aggregation."
},
{
"id": 72,
"questionText": "What is the role of 'n_jobs' parameter in Random Forest?",
"options": [
"Controls parallel processing",
"Controls noise injection",
"Controls memory allocation",
"Controls feature removal"
],
"correctAnswerIndex": 0,
"explanation": "n_jobs defines how many CPU cores to use in training."
},
{
"id": 73,
"questionText": "What happens if trees in a Random Forest are highly correlated?",
"options": [
"Bias becomes zero",
"Performance decreases",
"No effect",
"Accuracy increases massively"
],
"correctAnswerIndex": 1,
"explanation": "Less diversity among trees means less benefit from ensemble averaging."
},
{
"id": 74,
"questionText": "Why is Random Forest naturally resistant to overfitting?",
"options": [
"Because it always uses shallow trees",
"Because it restricts learning",
"Because it averages predictions from multiple trees",
"Because it limits depth"
],
"correctAnswerIndex": 2,
"explanation": "Averaging predictions reduces variance and overfitting."
},
{
"id": 75,
"questionText": "What is the output of feature importance scores in Random Forest?",
"options": [
"Relative importance values per feature",
"Loss graph",
"Class probability distribution",
"Confusion matrix"
],
"correctAnswerIndex": 0,
"explanation": "Feature importance shows which features contribute most to splits."
},
{
"id": 76,
"questionText": "Which of these indicates Random Forest overfitting?",
"options": [
"High training accuracy, low test accuracy",
"Slow training time only",
"Equal train and test accuracy",
"Low training accuracy, high test accuracy"
],
"correctAnswerIndex": 0,
"explanation": "Overfitting means model fits training well but generalizes poorly."
},
{
"id": 77,
"questionText": "What is a good reason to increase 'min_samples_leaf'?",
"options": [
"To reduce bias",
"To force normalization",
"To reduce overfitting",
"To increase overfitting"
],
"correctAnswerIndex": 2,
"explanation": "Larger leaves generalize better by preventing overly specific splits."
},
{
"id": 78,
"questionText": "Which Random Forest parameter can reduce model size and computation?",
"options": [
"max_depth",
"All of the above",
"n_estimators",
"max_samples"
],
"correctAnswerIndex": 1,
"explanation": "Reducing number of trees, depth, or samples lowers computational load."
},
{
"id": 79,
"questionText": "Which part of Random Forest helps most against overfitting?",
"options": [
"Gradient correction",
"Feature normalization",
"Deep trees",
"Bagging"
],
"correctAnswerIndex": 3,
"explanation": "Bagging reduces variance by training trees independently on random subsets."
},
{
"id": 80,
"questionText": "What is the disadvantage of using very small 'max_depth' in Random Forest?",
"options": [
"Unbalanced samples",
"Memory leak",
"Overfitting",
"Underfitting"
],
"correctAnswerIndex": 3,
"explanation": "Very shallow trees cannot capture complex relationships."
},
{
"id": 81,
"questionText": "How does Random Forest handle feature correlation?",
"options": [
"It removes correlated features by default",
"It may give correlated features lower importance",
"It fails completely",
"It merges correlated features"
],
"correctAnswerIndex": 1,
"explanation": "If two features are correlated, importance may be split between them."
},
{
"id": 82,
"questionText": "What is 'Gini Importance' in Random Forest?",
"options": [
"Metric to find best cluster",
"Loss function for optimization",
"Error on OOB samples",
"Measure of how much a feature reduces impurity"
],
"correctAnswerIndex": 3,
"explanation": "It quantifies impurity reduction contributed by each feature."
},
{
"id": 83,
"questionText": "Why is Random Forest not ideal for time-series forecasting?",
"options": [
"It needs scaling",
"It ignores temporal order",
"It can't process numbers",
"It overfits too much"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest treats data as independent samples, ignoring sequence dependence."
},
{
"id": 84,
"questionText": "What is a sign that 'n_estimators' should be increased?",
"options": [
"Very fast training",
"Perfect accuracy",
"High test variance",
"Low training accuracy only"
],
"correctAnswerIndex": 2,
"explanation": "Increasing trees reduces prediction variance and stabilizes model."
},
{
"id": 85,
"questionText": "What is 'entropy' used for in Random Forest?",
"options": [
"Learning rate control",
"Feature normalization",
"Pruning strategy",
"Split quality measure"
],
"correctAnswerIndex": 3,
"explanation": "Entropy and Gini are purity measures used to decide best splits."
},
{
"id": 86,
"questionText": "Which scenario may require reducing 'max_depth'?",
"options": [
"When training time is extremely short",
"When features are few",
"When training accuracy is perfect but test accuracy is low",
"When both accuracies are low"
],
"correctAnswerIndex": 2,
"explanation": "This indicates overfitting — reducing depth increases generalization."
},
{
"id": 87,
"questionText": "What is one major strength of Random Forest?",
"options": [
"Perfect for text generation",
"Robust to noise and overfitting",
"Predicts time trends",
"Always fastest model"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest is sturdy against noisy data due to ensemble averaging."
},
{
"id": 88,
"questionText": "Increasing 'min_samples_leaf' will most likely:",
"options": [
"Make model generalize better",
"Decrease bias heavily",
"Increase training variance",
"Increase memorization"
],
"correctAnswerIndex": 0,
"explanation": "Larger leaves lead to simpler splits and better generalization."
},
{
"id": 89,
"questionText": "Which metric is best for class imbalance evaluation in Random Forest?",
"options": [
"MSE",
"Recall / F1-score",
"Accuracy only",
"R-squared"
],
"correctAnswerIndex": 1,
"explanation": "F1 handles imbalanced data better by balancing precision and recall."
},
{
"id": 90,
"questionText": "What happens if 'max_features' is too high?",
"options": [
"Lower training accuracy",
"Trees become more random",
"Trees become more similar",
"OOB error becomes undefined"
],
"correctAnswerIndex": 2,
"explanation": "More features → less randomness → higher correlation between trees."
},
{
"id": 91,
"questionText": "Which combination may indicate optimal Random Forest tuning?",
"options": [
"Low accuracy on both",
"High train accuracy, high test accuracy",
"Low train accuracy, high test accuracy",
"High train accuracy, low test accuracy"
],
"correctAnswerIndex": 1,
"explanation": "This indicates low bias and low variance — a well-generalized model."
},
{
"id": 92,
"questionText": "Why doesn’t Random Forest require much hyperparameter tuning compared to other models?",
"options": [
"It ignores input data",
"It is robust to overfitting and variance",
"It always needs deep tuning",
"It cannot be tuned"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest naturally reduces variance and overfitting, making it less sensitive to hyperparameters."
},
{
"id": 93,
"questionText": "What is the effect of increasing 'n_estimators' on OOB error?",
"options": [
"OOB error is unaffected",
"OOB error fluctuates randomly",
"OOB error usually decreases and stabilizes",
"OOB error increases"
],
"correctAnswerIndex": 2,
"explanation": "More trees provide a better estimate of error and reduce variance of predictions."
},
{
"id": 94,
"questionText": "Which is true about correlated features in Random Forest?",
"options": [
"Correlation is ignored completely",
"Random Forest fails with correlation",
"Correlated features are removed automatically",
"Importance may be split among correlated features"
],
"correctAnswerIndex": 3,
"explanation": "When features are correlated, importance scores may be shared, lowering individual scores."
},
{
"id": 95,
"questionText": "Random Forest is considered a black-box model because?",
"options": [
"It outputs linear coefficients",
"It uses shallow trees only",
"It is hard to interpret individual predictions",
"It has only one tree"
],
"correctAnswerIndex": 2,
"explanation": "The ensemble of many trees makes it difficult to trace exact reasoning for predictions."
},
{
"id": 96,
"questionText": "Which is a good approach to reduce Random Forest computation on very large datasets?",
"options": [
"Remove bagging",
"Use all features",
"Increase depth",
"Reduce 'n_estimators' or use 'max_samples'"
],
"correctAnswerIndex": 3,
"explanation": "Fewer trees or smaller bootstrap samples lower computational cost."
},
{
"id": 97,
"questionText": "Why is Random Forest more stable than a single Decision Tree?",
"options": [
"Because it uses scaling",
"Because it prunes all trees heavily",
"Because it has only one tree",
"Because predictions are averaged over many trees"
],
"correctAnswerIndex": 3,
"explanation": "Averaging reduces sensitivity to noise and variance in data."
},
{
"id": 98,
"questionText": "What kind of bias-variance tradeoff does Random Forest achieve?",
"options": [
"High bias, low variance",
"Low bias, low variance",
"High bias, high variance",
"Low bias, high variance"
],
"correctAnswerIndex": 1,
"explanation": "Bagging ensures variance reduction while keeping bias relatively low."
},
{
"id": 99,
"questionText": "Which Random Forest feature allows quick insight into feature relevance?",
"options": [
"Feature importance scores",
"OOB error",
"min_samples_split",
"max_depth"
],
"correctAnswerIndex": 0,
"explanation": "These scores help identify which features are most influential in predictions."
},
{
"id": 100,
"questionText": "In Random Forest classification, which method aggregates the outputs of all trees?",
"options": [
"Gradient boosting",
"Weighted averaging",
"Softmax",
"Majority voting"
],
"correctAnswerIndex": 3,
"explanation": "Random Forest takes the class predicted by the majority of trees as the final output."
}
]
}