{ "title": "Random Forest Mastery: 100 MCQs", "description": "A comprehensive set of multiple-choice questions designed to test and deepen your understanding of Random Forest, covering fundamentals, parameters, ensemble concepts, and practical applications.", "questions": [ { "id": 1, "questionText": "What is Random Forest primarily used for?", "options": [ "Only Clustering", "Only Time Series", "Only Image Processing", "Classification and Regression" ], "correctAnswerIndex": 3, "explanation": "Random Forest is a versatile ensemble method used for both classification and regression tasks." }, { "id": 2, "questionText": "Random Forest is an example of which type of learning?", "options": [ "Supervised Learning", "Unsupervised Learning", "Self-Supervised Learning", "Reinforcement Learning" ], "correctAnswerIndex": 0, "explanation": "Random Forest is trained using labeled data, so it is supervised learning." }, { "id": 3, "questionText": "What is the base algorithm used inside a Random Forest?", "options": [ "Linear Regression", "K-Means", "Decision Trees", "Neural Networks" ], "correctAnswerIndex": 2, "explanation": "Random Forest builds multiple Decision Trees and combines them." }, { "id": 4, "questionText": "Why is it called 'Random' Forest?", "options": [ "Because it gives random answers", "Because trees are random shapes", "Because it uses randomness in data and features", "Because it is used randomly" ], "correctAnswerIndex": 2, "explanation": "Random Forest randomly selects data samples and features to build diverse trees." }, { "id": 5, "questionText": "What does Random Forest reduce compared to a single Decision Tree?", "options": [ "Accuracy", "Computation Time", "Overfitting", "Data Size" ], "correctAnswerIndex": 2, "explanation": "By combining many trees, Random Forest reduces overfitting." }, { "id": 6, "questionText": "What technique does Random Forest use to train different trees?", "options": [ "Gradient Descent", "Bootstrap Sampling", "Dropout", "Pooling" ], "correctAnswerIndex": 1, "explanation": "Random Forest uses bootstrap sampling (bagging) to create different training subsets." }, { "id": 7, "questionText": "Random Forest is an example of which ensemble method?", "options": [ "Boosting", "Stacking", "Bagging", "Reinforcement" ], "correctAnswerIndex": 2, "explanation": "Random Forest is a bagging-based ensemble learning method." }, { "id": 8, "questionText": "Which metric is commonly used to measure feature importance in Random Forest?", "options": [ "Euclidean Distance", "Entropy Loss", "Gini Importance", "Cosine Similarity" ], "correctAnswerIndex": 2, "explanation": "Gini Impurity is used to decide splits, and feature importance is derived from it." }, { "id": 9, "questionText": "What does each individual tree in a Random Forest output during classification?", "options": [ "A regression score only", "A class prediction", "A probability distribution", "A clustering label" ], "correctAnswerIndex": 1, "explanation": "Each tree predicts a class, and Random Forest takes the majority vote." }, { "id": 10, "questionText": "How does Random Forest make the final prediction in classification?", "options": [ "Majority voting", "Max pooling", "Averaging", "Sorting" ], "correctAnswerIndex": 0, "explanation": "Random Forest predicts the class with the highest number of votes from trees." }, { "id": 11, "questionText": "What happens if we increase the number of trees in Random Forest?", "options": [ "Accuracy usually improves", "Model becomes unstable", "Accuracy always decreases", "It deletes trees randomly" ], "correctAnswerIndex": 0, "explanation": "More trees reduce variance and improve accuracy until a saturation point." }, { "id": 12, "questionText": "What kind of data can Random Forest handle?", "options": [ "Only numerical", "Only text data", "Both categorical and numerical", "Only time series" ], "correctAnswerIndex": 2, "explanation": "Random Forest works well with mixed data types." }, { "id": 13, "questionText": "Random Forest is robust to which problem?", "options": [ "Large memory usage", "Outliers", "Class imbalance", "Overfitting" ], "correctAnswerIndex": 3, "explanation": "Random Forest reduces overfitting by combining multiple trees." }, { "id": 14, "questionText": "What is the default criterion for splitting nodes in Random Forest classification?", "options": [ "MAE", "Gini Impurity", "MSE", "Cosine Distance" ], "correctAnswerIndex": 1, "explanation": "Gini impurity is the default split criterion for classification." }, { "id": 15, "questionText": "How does Random Forest handle missing values?", "options": [ "It ignores all rows", "It can handle them fairly well", "It crashes immediately", "It replaces them with zeros" ], "correctAnswerIndex": 1, "explanation": "Random Forest can handle missing values better than many algorithms." }, { "id": 16, "questionText": "What is the advantage of Random Forest over a single Decision Tree?", "options": [ "No training required", "Always 100% accuracy", "Higher accuracy", "Less training time" ], "correctAnswerIndex": 2, "explanation": "Random Forest is more accurate than a single Decision Tree due to ensemble voting." }, { "id": 17, "questionText": "What type of sampling is used in Random Forest?", "options": [ "Sequential sampling", "Sampling with replacement", "K-fold only", "Sampling without replacement" ], "correctAnswerIndex": 1, "explanation": "Random Forest uses bootstrap sampling, which is sampling with replacement." }, { "id": 18, "questionText": "What does each tree in Random Forest learn from?", "options": [ "Only 50% of all features", "Only one class of data", "A random subset of data", "The entire dataset" ], "correctAnswerIndex": 2, "explanation": "Each tree is trained on different bootstrapped samples." }, { "id": 19, "questionText": "What happens if the number of trees is too small?", "options": [ "Model becomes overconfident", "It increases memory usage too much", "It always overfits", "Model may underfit" ], "correctAnswerIndex": 3, "explanation": "Too few trees may result in underfitting and poor accuracy." }, { "id": 20, "questionText": "Random Forest reduces variance by?", "options": [ "Adding dropout", "Averaging multiple trees", "Increasing learning rate", "Minimizing entropy" ], "correctAnswerIndex": 1, "explanation": "Averaging predictions reduces variance and improves generalization." }, { "id": 21, "questionText": "What is the method used to combine predictions in Random Forest?", "options": [ "Majority voting", "Stacking", "Gradient descent", "Concatenation" ], "correctAnswerIndex": 0, "explanation": "Classification is done using majority vote." }, { "id": 22, "questionText": "What happens during training if two trees see different features?", "options": [ "They predict randomly", "They become identical", "They learn different patterns", "They crash" ], "correctAnswerIndex": 2, "explanation": "Feature randomness ensures diverse learning across trees." }, { "id": 23, "questionText": "Is Random Forest sensitive to feature scaling?", "options": [ "Yes", "Only for categorical features", "Only for small datasets", "No" ], "correctAnswerIndex": 3, "explanation": "Random Forest does not require normalization or scaling." }, { "id": 24, "questionText": "Random Forest internally uses how many Decision Trees?", "options": [ "Based on dataset size", "User-defined number", "Exactly 10", "Always 1" ], "correctAnswerIndex": 1, "explanation": "The number of trees is set by the user using the 'n_estimators' parameter." }, { "id": 25, "questionText": "Random Forest works well when the dataset is?", "options": [ "Only with time series", "Large with many features", "Only with text data", "Very small only" ], "correctAnswerIndex": 1, "explanation": "Random Forest performs well with high-dimensional and large datasets." }, { "id": 26, "questionText": "What is the output of Random Forest for binary classification?", "options": [ "Probability only", "Only 1", "Only 0", "0 or 1" ], "correctAnswerIndex": 3, "explanation": "The final output is a class label like 0 or 1." }, { "id": 27, "questionText": "What is 'n_estimators' in Random Forest?", "options": [ "Number of features", "Number of layers", "Number of epochs", "Number of trees" ], "correctAnswerIndex": 3, "explanation": "'n_estimators' defines how many Decision Trees to train." }, { "id": 28, "questionText": "What happens if all trees in Random Forest agree?", "options": [ "Model crashes", "Accuracy drops", "High confidence in prediction", "It becomes regression" ], "correctAnswerIndex": 2, "explanation": "More agreement among trees increases prediction confidence." }, { "id": 29, "questionText": "Which parameter controls the depth of trees in Random Forest?", "options": [ "n_estimators", "learning_rate", "max_depth", "n_clusters" ], "correctAnswerIndex": 2, "explanation": "max_depth controls how deep each tree can grow." }, { "id": 30, "questionText": "What is a potential drawback of Random Forest?", "options": [ "Cannot classify data", "Needs feature scaling", "High memory usage", "Always underfits" ], "correctAnswerIndex": 2, "explanation": "Training many trees can consume large memory and computation." }, { "id": 31, "questionText": "What is the main reason Random Forest performs well compared to a single tree?", "options": [ "It removes features randomly", "It increases bias intentionally", "It uses deep neural layers", "It averages multiple trees to reduce variance" ], "correctAnswerIndex": 3, "explanation": "Averaging multiple independent trees stabilizes the predictions and lowers overfitting." }, { "id": 32, "questionText": "What does the term 'out-of-bag' (OOB) error mean in Random Forest?", "options": [ "Training error on all data", "Error on random subsets", "Loss on test set only", "Error on unseen samples not used in training trees" ], "correctAnswerIndex": 3, "explanation": "OOB error estimates model performance using samples not included in the bootstrap subset." }, { "id": 33, "questionText": "How does Random Forest ensure diversity among trees?", "options": [ "By pruning all trees equally", "Using same random seed", "Random sampling of data and features", "Training all trees on same data" ], "correctAnswerIndex": 2, "explanation": "Bootstrapping and random feature selection introduce variation between trees." }, { "id": 34, "questionText": "Which of the following parameters controls the number of features considered for splitting?", "options": [ "min_samples_split", "max_features", "n_estimators", "max_depth" ], "correctAnswerIndex": 1, "explanation": "max_features limits how many features are chosen at each split, encouraging diversity." }, { "id": 35, "questionText": "What happens if 'max_features' is set to 1 in a Random Forest?", "options": [ "Each tree becomes highly decorrelated", "All trees are identical", "Model becomes identical to a single tree", "Training stops early" ], "correctAnswerIndex": 0, "explanation": "When only one feature is chosen at each split, trees are very different, improving ensemble strength." }, { "id": 36, "questionText": "Which evaluation metric is best for imbalanced classification using Random Forest?", "options": [ "Accuracy", "F1-score", "MSE", "R²" ], "correctAnswerIndex": 1, "explanation": "F1-score balances precision and recall, making it ideal for imbalanced datasets." }, { "id": 37, "questionText": "Random Forest handles overfitting better than a single decision tree mainly due to?", "options": [ "Ensemble averaging", "Deep pruning", "More bias", "Gradient descent" ], "correctAnswerIndex": 0, "explanation": "Averaging the outputs of multiple uncorrelated trees reduces overfitting." }, { "id": 38, "questionText": "What is the typical relationship between bias and variance in Random Forest?", "options": [ "High bias, low variance", "Low bias, high variance", "High bias, high variance", "Low bias, low variance" ], "correctAnswerIndex": 3, "explanation": "Random Forest balances both bias and variance well due to its ensemble structure." }, { "id": 39, "questionText": "In Random Forest, which trees are used to predict a test sample?", "options": [ "Random subset of trees", "Only first tree", "All trees in the ensemble", "Last tree only" ], "correctAnswerIndex": 2, "explanation": "Each tree contributes to prediction, and results are aggregated by majority voting." }, { "id": 40, "questionText": "What is the purpose of 'random_state' in Random Forest?", "options": [ "Increasing randomness", "Feature selection", "Reproducibility", "Performance improvement" ], "correctAnswerIndex": 2, "explanation": "random_state ensures the same random sampling for consistent results." }, { "id": 41, "questionText": "What is the role of 'min_samples_split' in Random Forest?", "options": [ "Number of bootstrap samples", "Total number of features used", "Maximum leaf nodes allowed", "Minimum number of samples required to split an internal node" ], "correctAnswerIndex": 3, "explanation": "It prevents splits when a node has too few samples, reducing overfitting." }, { "id": 42, "questionText": "What is feature importance in Random Forest?", "options": [ "A pruning factor", "A clustering metric", "A measure of data imbalance", "A score showing how useful a feature is for prediction" ], "correctAnswerIndex": 3, "explanation": "Feature importance reflects how much each feature reduces impurity in trees." }, { "id": 43, "questionText": "What technique is used by Random Forest to combine multiple tree outputs?", "options": [ "Stacking", "Boosting", "Bagging", "Dropout" ], "correctAnswerIndex": 2, "explanation": "Random Forest is based on bagging — bootstrap aggregation of decision trees." }, { "id": 44, "questionText": "If Random Forest has too many trees, what is the likely result?", "options": [ "Accuracy decreases", "Overfitting increases", "Computation cost increases", "Model becomes unstable" ], "correctAnswerIndex": 2, "explanation": "After a certain number, adding trees only increases computation without much gain." }, { "id": 45, "questionText": "Which parameter limits how deep a tree can grow?", "options": [ "n_estimators", "max_depth", "criterion", "max_features" ], "correctAnswerIndex": 1, "explanation": "max_depth sets the maximum depth, controlling model complexity." }, { "id": 46, "questionText": "What is the main drawback of Random Forest in large datasets?", "options": [ "Low accuracy", "High computational cost", "High bias", "No randomness" ], "correctAnswerIndex": 1, "explanation": "Training hundreds of trees can be time-consuming for large datasets." }, { "id": 47, "questionText": "Which of these can Random Forest NOT handle directly?", "options": [ "Categorical data", "Sequential time dependencies", "Missing values", "Large datasets" ], "correctAnswerIndex": 1, "explanation": "Random Forest doesn’t model time dependencies, so it's not ideal for time series." }, { "id": 48, "questionText": "How is randomness introduced in Random Forest?", "options": [ "Bootstrap sampling and random feature selection", "Gradient descent", "Batch normalization", "Learning rate scheduling" ], "correctAnswerIndex": 0, "explanation": "Random Forest introduces randomness both in data and feature sampling." }, { "id": 49, "questionText": "What type of ensemble method is Random Forest?", "options": [ "Voting", "Bagging", "Boosting", "Stacking" ], "correctAnswerIndex": 1, "explanation": "Random Forest uses bagging (bootstrap aggregation) to train multiple trees." }, { "id": 50, "questionText": "What is the relationship between Decision Tree depth and overfitting?", "options": [ "Deeper trees tend to overfit", "Deeper trees always underfit", "Depth has no effect", "Shallow trees always overfit" ], "correctAnswerIndex": 0, "explanation": "Large tree depth can cause the model to memorize training data patterns." }, { "id": 51, "questionText": "What happens to the Random Forest model if trees are too shallow?", "options": [ "Model overfits", "Training time increases", "Variance increases", "Model underfits" ], "correctAnswerIndex": 3, "explanation": "Shallow trees can't capture complex data patterns." }, { "id": 52, "questionText": "Why does Random Forest not require feature scaling?", "options": [ "It normalizes automatically", "It splits based on thresholds, not distance", "It uses Euclidean distance", "It drops correlated features" ], "correctAnswerIndex": 1, "explanation": "Tree-based methods are invariant to feature scaling." }, { "id": 53, "questionText": "What happens if all trees are trained on identical bootstrap samples?", "options": [ "Higher accuracy", "No effect", "Reduced diversity", "Faster training" ], "correctAnswerIndex": 2, "explanation": "Lack of randomness among trees reduces ensemble benefit." }, { "id": 54, "questionText": "Which statement is TRUE about Random Forest?", "options": [ "It removes all bias", "It reduces bias but increases variance", "It increases both bias and variance", "It reduces variance but keeps bias low" ], "correctAnswerIndex": 3, "explanation": "Bagging in Random Forest reduces variance without significantly increasing bias." }, { "id": 55, "questionText": "In Random Forest, what does 'bootstrap=True' mean?", "options": [ "No randomness is applied", "Each tree skips feature selection", "All trees use the full dataset", "Each tree is trained on a random sample with replacement" ], "correctAnswerIndex": 3, "explanation": "Bootstrap sampling ensures each tree sees a slightly different dataset." }, { "id": 56, "questionText": "How is feature importance calculated in Random Forest?", "options": [ "Based on learning rate", "Using feature frequency", "By gradient descent", "Based on impurity reduction" ], "correctAnswerIndex": 3, "explanation": "It measures how much each feature decreases node impurity across all trees." }, { "id": 57, "questionText": "What is a typical hyperparameter tuning technique for Random Forest?", "options": [ "Grid Search or Random Search", "K-means", "Dropout", "Gradient Descent" ], "correctAnswerIndex": 0, "explanation": "Both Grid and Random Search are popular for hyperparameter tuning." }, { "id": 58, "questionText": "What happens if we set 'n_estimators' too high?", "options": [ "Lower accuracy", "Longer training time", "Underfitting", "Loss of randomness" ], "correctAnswerIndex": 1, "explanation": "Too many trees make training slow, though accuracy improvement becomes marginal." }, { "id": 59, "questionText": "How is Random Forest resistant to overfitting?", "options": [ "Using deeper trees", "Gradient correction", "Averaging independent trees", "Removing bias" ], "correctAnswerIndex": 2, "explanation": "Averaging many independent models cancels out noise and variance." }, { "id": 60, "questionText": "Which of the following best describes the Random Forest algorithm?", "options": [ "A single large decision tree", "Linear regression with trees", "Stacked boosting method", "Ensemble of decision trees trained on random subsets" ], "correctAnswerIndex": 3, "explanation": "Random Forest is an ensemble approach using bagging and random feature selection." }, { "id": 61, "questionText": "What is the main reason Random Forest works well even with noisy data?", "options": [ "It applies dropout regularization", "It removes noise automatically", "It memorizes noise across all trees", "It averages multiple trees to smooth out noise" ], "correctAnswerIndex": 3, "explanation": "Averaging predictions of multiple trees reduces the impact of noise in data." }, { "id": 62, "questionText": "Which technique helps Random Forest estimate generalization error without a validation set?", "options": [ "Cross-validation only", "Early stopping", "Out-of-Bag (OOB) estimation", "Dropout sampling" ], "correctAnswerIndex": 2, "explanation": "OOB samples are not seen during training, allowing internal error estimation." }, { "id": 63, "questionText": "What is the effect of increasing 'min_samples_split' too much?", "options": [ "Model may underfit", "Model may overfit", "Training crashes", "Bias becomes zero" ], "correctAnswerIndex": 0, "explanation": "Larger 'min_samples_split' prevents deeper splits, reducing learning capacity." }, { "id": 64, "questionText": "What is the typical output of Random Forest in binary classification?", "options": [ "Always continuous output", "Softmax score", "Only probability", "Majority class from all trees" ], "correctAnswerIndex": 3, "explanation": "Random Forest uses majority voting to decide final class." }, { "id": 65, "questionText": "In Random Forest, what happens if we disable bootstrap sampling?", "options": [ "All trees become identical", "Each tree will see full dataset", "Training becomes impossible", "Feature importance cannot be calculated" ], "correctAnswerIndex": 1, "explanation": "bootstrap=False means no sampling, trees are trained on complete dataset." }, { "id": 66, "questionText": "Which Random Forest parameter controls how many features a single split considers?", "options": [ "min_samples_split", "max_depth", "max_features", "n_estimators" ], "correctAnswerIndex": 2, "explanation": "Randomly selecting only 'max_features' at each split ensures diversity." }, { "id": 67, "questionText": "Which situation is most ideal for using Random Forest?", "options": [ "Low-dimensional time series", "Fully labeled image datasets only", "Continuous text data", "High-dimensional structured tabular data" ], "correctAnswerIndex": 3, "explanation": "Random Forest is excellent for large structured numeric + categorical datasets." }, { "id": 68, "questionText": "How does Random Forest improve generalization?", "options": [ "By memorizing data patterns", "By deep pruning all trees", "By increasing bias", "By reducing variance using averaging" ], "correctAnswerIndex": 3, "explanation": "Averaging predictions from many uncorrelated trees reduces variance." }, { "id": 69, "questionText": "What is a scenario where Random Forest might perform poorly?", "options": [ "Large tabular dataset", "Handling missing values", "Highly sequential time-based data", "Text classification with manual encoding" ], "correctAnswerIndex": 2, "explanation": "Random Forest is not designed to understand sequential temporal dependencies." }, { "id": 70, "questionText": "What is the advantage of using 'max_samples' parameter in Random Forest?", "options": [ "It forces normalization", "It increases tree depth", "It controls how many samples each tree sees", "It controls feature count" ], "correctAnswerIndex": 2, "explanation": "max_samples limits data per tree to improve speed and variability." }, { "id": 71, "questionText": "Why is Random Forest called a 'bagging' technique?", "options": [ "It merges deep networks", "It sequentially boosts errors", "It uses bootstrap sampling + aggregation", "It stacks models layer by layer" ], "correctAnswerIndex": 2, "explanation": "Random Forest is based on Bagging = Bootstrap + Aggregation." }, { "id": 72, "questionText": "What is the role of 'n_jobs' parameter in Random Forest?", "options": [ "Controls parallel processing", "Controls noise injection", "Controls memory allocation", "Controls feature removal" ], "correctAnswerIndex": 0, "explanation": "n_jobs defines how many CPU cores to use in training." }, { "id": 73, "questionText": "What happens if trees in a Random Forest are highly correlated?", "options": [ "Bias becomes zero", "Performance decreases", "No effect", "Accuracy increases massively" ], "correctAnswerIndex": 1, "explanation": "Less diversity among trees means less benefit from ensemble averaging." }, { "id": 74, "questionText": "Why is Random Forest naturally resistant to overfitting?", "options": [ "Because it always uses shallow trees", "Because it restricts learning", "Because it averages predictions from multiple trees", "Because it limits depth" ], "correctAnswerIndex": 2, "explanation": "Averaging predictions reduces variance and overfitting." }, { "id": 75, "questionText": "What is the output of feature importance scores in Random Forest?", "options": [ "Relative importance values per feature", "Loss graph", "Class probability distribution", "Confusion matrix" ], "correctAnswerIndex": 0, "explanation": "Feature importance shows which features contribute most to splits." }, { "id": 76, "questionText": "Which of these indicates Random Forest overfitting?", "options": [ "High training accuracy, low test accuracy", "Slow training time only", "Equal train and test accuracy", "Low training accuracy, high test accuracy" ], "correctAnswerIndex": 0, "explanation": "Overfitting means model fits training well but generalizes poorly." }, { "id": 77, "questionText": "What is a good reason to increase 'min_samples_leaf'?", "options": [ "To reduce bias", "To force normalization", "To reduce overfitting", "To increase overfitting" ], "correctAnswerIndex": 2, "explanation": "Larger leaves generalize better by preventing overly specific splits." }, { "id": 78, "questionText": "Which Random Forest parameter can reduce model size and computation?", "options": [ "max_depth", "All of the above", "n_estimators", "max_samples" ], "correctAnswerIndex": 1, "explanation": "Reducing number of trees, depth, or samples lowers computational load." }, { "id": 79, "questionText": "Which part of Random Forest helps most against overfitting?", "options": [ "Gradient correction", "Feature normalization", "Deep trees", "Bagging" ], "correctAnswerIndex": 3, "explanation": "Bagging reduces variance by training trees independently on random subsets." }, { "id": 80, "questionText": "What is the disadvantage of using very small 'max_depth' in Random Forest?", "options": [ "Unbalanced samples", "Memory leak", "Overfitting", "Underfitting" ], "correctAnswerIndex": 3, "explanation": "Very shallow trees cannot capture complex relationships." }, { "id": 81, "questionText": "How does Random Forest handle feature correlation?", "options": [ "It removes correlated features by default", "It may give correlated features lower importance", "It fails completely", "It merges correlated features" ], "correctAnswerIndex": 1, "explanation": "If two features are correlated, importance may be split between them." }, { "id": 82, "questionText": "What is 'Gini Importance' in Random Forest?", "options": [ "Metric to find best cluster", "Loss function for optimization", "Error on OOB samples", "Measure of how much a feature reduces impurity" ], "correctAnswerIndex": 3, "explanation": "It quantifies impurity reduction contributed by each feature." }, { "id": 83, "questionText": "Why is Random Forest not ideal for time-series forecasting?", "options": [ "It needs scaling", "It ignores temporal order", "It can't process numbers", "It overfits too much" ], "correctAnswerIndex": 1, "explanation": "Random Forest treats data as independent samples, ignoring sequence dependence." }, { "id": 84, "questionText": "What is a sign that 'n_estimators' should be increased?", "options": [ "Very fast training", "Perfect accuracy", "High test variance", "Low training accuracy only" ], "correctAnswerIndex": 2, "explanation": "Increasing trees reduces prediction variance and stabilizes model." }, { "id": 85, "questionText": "What is 'entropy' used for in Random Forest?", "options": [ "Learning rate control", "Feature normalization", "Pruning strategy", "Split quality measure" ], "correctAnswerIndex": 3, "explanation": "Entropy and Gini are purity measures used to decide best splits." }, { "id": 86, "questionText": "Which scenario may require reducing 'max_depth'?", "options": [ "When training time is extremely short", "When features are few", "When training accuracy is perfect but test accuracy is low", "When both accuracies are low" ], "correctAnswerIndex": 2, "explanation": "This indicates overfitting — reducing depth increases generalization." }, { "id": 87, "questionText": "What is one major strength of Random Forest?", "options": [ "Perfect for text generation", "Robust to noise and overfitting", "Predicts time trends", "Always fastest model" ], "correctAnswerIndex": 1, "explanation": "Random Forest is sturdy against noisy data due to ensemble averaging." }, { "id": 88, "questionText": "Increasing 'min_samples_leaf' will most likely:", "options": [ "Make model generalize better", "Decrease bias heavily", "Increase training variance", "Increase memorization" ], "correctAnswerIndex": 0, "explanation": "Larger leaves lead to simpler splits and better generalization." }, { "id": 89, "questionText": "Which metric is best for class imbalance evaluation in Random Forest?", "options": [ "MSE", "Recall / F1-score", "Accuracy only", "R-squared" ], "correctAnswerIndex": 1, "explanation": "F1 handles imbalanced data better by balancing precision and recall." }, { "id": 90, "questionText": "What happens if 'max_features' is too high?", "options": [ "Lower training accuracy", "Trees become more random", "Trees become more similar", "OOB error becomes undefined" ], "correctAnswerIndex": 2, "explanation": "More features → less randomness → higher correlation between trees." }, { "id": 91, "questionText": "Which combination may indicate optimal Random Forest tuning?", "options": [ "Low accuracy on both", "High train accuracy, high test accuracy", "Low train accuracy, high test accuracy", "High train accuracy, low test accuracy" ], "correctAnswerIndex": 1, "explanation": "This indicates low bias and low variance — a well-generalized model." }, { "id": 92, "questionText": "Why doesn’t Random Forest require much hyperparameter tuning compared to other models?", "options": [ "It ignores input data", "It is robust to overfitting and variance", "It always needs deep tuning", "It cannot be tuned" ], "correctAnswerIndex": 1, "explanation": "Random Forest naturally reduces variance and overfitting, making it less sensitive to hyperparameters." }, { "id": 93, "questionText": "What is the effect of increasing 'n_estimators' on OOB error?", "options": [ "OOB error is unaffected", "OOB error fluctuates randomly", "OOB error usually decreases and stabilizes", "OOB error increases" ], "correctAnswerIndex": 2, "explanation": "More trees provide a better estimate of error and reduce variance of predictions." }, { "id": 94, "questionText": "Which is true about correlated features in Random Forest?", "options": [ "Correlation is ignored completely", "Random Forest fails with correlation", "Correlated features are removed automatically", "Importance may be split among correlated features" ], "correctAnswerIndex": 3, "explanation": "When features are correlated, importance scores may be shared, lowering individual scores." }, { "id": 95, "questionText": "Random Forest is considered a black-box model because?", "options": [ "It outputs linear coefficients", "It uses shallow trees only", "It is hard to interpret individual predictions", "It has only one tree" ], "correctAnswerIndex": 2, "explanation": "The ensemble of many trees makes it difficult to trace exact reasoning for predictions." }, { "id": 96, "questionText": "Which is a good approach to reduce Random Forest computation on very large datasets?", "options": [ "Remove bagging", "Use all features", "Increase depth", "Reduce 'n_estimators' or use 'max_samples'" ], "correctAnswerIndex": 3, "explanation": "Fewer trees or smaller bootstrap samples lower computational cost." }, { "id": 97, "questionText": "Why is Random Forest more stable than a single Decision Tree?", "options": [ "Because it uses scaling", "Because it prunes all trees heavily", "Because it has only one tree", "Because predictions are averaged over many trees" ], "correctAnswerIndex": 3, "explanation": "Averaging reduces sensitivity to noise and variance in data." }, { "id": 98, "questionText": "What kind of bias-variance tradeoff does Random Forest achieve?", "options": [ "High bias, low variance", "Low bias, low variance", "High bias, high variance", "Low bias, high variance" ], "correctAnswerIndex": 1, "explanation": "Bagging ensures variance reduction while keeping bias relatively low." }, { "id": 99, "questionText": "Which Random Forest feature allows quick insight into feature relevance?", "options": [ "Feature importance scores", "OOB error", "min_samples_split", "max_depth" ], "correctAnswerIndex": 0, "explanation": "These scores help identify which features are most influential in predictions." }, { "id": 100, "questionText": "In Random Forest classification, which method aggregates the outputs of all trees?", "options": [ "Gradient boosting", "Weighted averaging", "Softmax", "Majority voting" ], "correctAnswerIndex": 3, "explanation": "Random Forest takes the class predicted by the majority of trees as the final output." } ] }