| { | |
| "title": "Random Forest Mastery: 100 MCQs", | |
| "description": "A comprehensive set of multiple-choice questions designed to test and deepen your understanding of Random Forest, covering fundamentals, parameters, ensemble concepts, and practical applications.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is Random Forest primarily used for?", | |
| "options": [ | |
| "Only Clustering", | |
| "Only Time Series", | |
| "Only Image Processing", | |
| "Classification and Regression" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest is a versatile ensemble method used for both classification and regression tasks." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "Random Forest is an example of which type of learning?", | |
| "options": [ | |
| "Supervised Learning", | |
| "Unsupervised Learning", | |
| "Self-Supervised Learning", | |
| "Reinforcement Learning" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Random Forest is trained using labeled data, so it is supervised learning." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "What is the base algorithm used inside a Random Forest?", | |
| "options": [ | |
| "Linear Regression", | |
| "K-Means", | |
| "Decision Trees", | |
| "Neural Networks" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest builds multiple Decision Trees and combines them." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "Why is it called 'Random' Forest?", | |
| "options": [ | |
| "Because it gives random answers", | |
| "Because trees are random shapes", | |
| "Because it uses randomness in data and features", | |
| "Because it is used randomly" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest randomly selects data samples and features to build diverse trees." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "What does Random Forest reduce compared to a single Decision Tree?", | |
| "options": [ | |
| "Accuracy", | |
| "Computation Time", | |
| "Overfitting", | |
| "Data Size" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "By combining many trees, Random Forest reduces overfitting." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "What technique does Random Forest use to train different trees?", | |
| "options": [ | |
| "Gradient Descent", | |
| "Bootstrap Sampling", | |
| "Dropout", | |
| "Pooling" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest uses bootstrap sampling (bagging) to create different training subsets." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Random Forest is an example of which ensemble method?", | |
| "options": [ | |
| "Boosting", | |
| "Stacking", | |
| "Bagging", | |
| "Reinforcement" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest is a bagging-based ensemble learning method." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Which metric is commonly used to measure feature importance in Random Forest?", | |
| "options": [ | |
| "Euclidean Distance", | |
| "Entropy Loss", | |
| "Gini Importance", | |
| "Cosine Similarity" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Gini Impurity is used to decide splits, and feature importance is derived from it." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "What does each individual tree in a Random Forest output during classification?", | |
| "options": [ | |
| "A regression score only", | |
| "A class prediction", | |
| "A probability distribution", | |
| "A clustering label" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Each tree predicts a class, and Random Forest takes the majority vote." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "How does Random Forest make the final prediction in classification?", | |
| "options": [ | |
| "Majority voting", | |
| "Max pooling", | |
| "Averaging", | |
| "Sorting" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Random Forest predicts the class with the highest number of votes from trees." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "What happens if we increase the number of trees in Random Forest?", | |
| "options": [ | |
| "Accuracy usually improves", | |
| "Model becomes unstable", | |
| "Accuracy always decreases", | |
| "It deletes trees randomly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "More trees reduce variance and improve accuracy until a saturation point." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "What kind of data can Random Forest handle?", | |
| "options": [ | |
| "Only numerical", | |
| "Only text data", | |
| "Both categorical and numerical", | |
| "Only time series" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest works well with mixed data types." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Random Forest is robust to which problem?", | |
| "options": [ | |
| "Large memory usage", | |
| "Outliers", | |
| "Class imbalance", | |
| "Overfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest reduces overfitting by combining multiple trees." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "What is the default criterion for splitting nodes in Random Forest classification?", | |
| "options": [ | |
| "MAE", | |
| "Gini Impurity", | |
| "MSE", | |
| "Cosine Distance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Gini impurity is the default split criterion for classification." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "How does Random Forest handle missing values?", | |
| "options": [ | |
| "It ignores all rows", | |
| "It can handle them fairly well", | |
| "It crashes immediately", | |
| "It replaces them with zeros" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest can handle missing values better than many algorithms." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "What is the advantage of Random Forest over a single Decision Tree?", | |
| "options": [ | |
| "No training required", | |
| "Always 100% accuracy", | |
| "Higher accuracy", | |
| "Less training time" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest is more accurate than a single Decision Tree due to ensemble voting." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "What type of sampling is used in Random Forest?", | |
| "options": [ | |
| "Sequential sampling", | |
| "Sampling with replacement", | |
| "K-fold only", | |
| "Sampling without replacement" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest uses bootstrap sampling, which is sampling with replacement." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "What does each tree in Random Forest learn from?", | |
| "options": [ | |
| "Only 50% of all features", | |
| "Only one class of data", | |
| "A random subset of data", | |
| "The entire dataset" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Each tree is trained on different bootstrapped samples." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "What happens if the number of trees is too small?", | |
| "options": [ | |
| "Model becomes overconfident", | |
| "It increases memory usage too much", | |
| "It always overfits", | |
| "Model may underfit" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Too few trees may result in underfitting and poor accuracy." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Random Forest reduces variance by?", | |
| "options": [ | |
| "Adding dropout", | |
| "Averaging multiple trees", | |
| "Increasing learning rate", | |
| "Minimizing entropy" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Averaging predictions reduces variance and improves generalization." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "What is the method used to combine predictions in Random Forest?", | |
| "options": [ | |
| "Majority voting", | |
| "Stacking", | |
| "Gradient descent", | |
| "Concatenation" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Classification is done using majority vote." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "What happens during training if two trees see different features?", | |
| "options": [ | |
| "They predict randomly", | |
| "They become identical", | |
| "They learn different patterns", | |
| "They crash" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Feature randomness ensures diverse learning across trees." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Is Random Forest sensitive to feature scaling?", | |
| "options": [ | |
| "Yes", | |
| "Only for categorical features", | |
| "Only for small datasets", | |
| "No" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest does not require normalization or scaling." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Random Forest internally uses how many Decision Trees?", | |
| "options": [ | |
| "Based on dataset size", | |
| "User-defined number", | |
| "Exactly 10", | |
| "Always 1" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The number of trees is set by the user using the 'n_estimators' parameter." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Random Forest works well when the dataset is?", | |
| "options": [ | |
| "Only with time series", | |
| "Large with many features", | |
| "Only with text data", | |
| "Very small only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest performs well with high-dimensional and large datasets." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "What is the output of Random Forest for binary classification?", | |
| "options": [ | |
| "Probability only", | |
| "Only 1", | |
| "Only 0", | |
| "0 or 1" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "The final output is a class label like 0 or 1." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "What is 'n_estimators' in Random Forest?", | |
| "options": [ | |
| "Number of features", | |
| "Number of layers", | |
| "Number of epochs", | |
| "Number of trees" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "'n_estimators' defines how many Decision Trees to train." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "What happens if all trees in Random Forest agree?", | |
| "options": [ | |
| "Model crashes", | |
| "Accuracy drops", | |
| "High confidence in prediction", | |
| "It becomes regression" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "More agreement among trees increases prediction confidence." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Which parameter controls the depth of trees in Random Forest?", | |
| "options": [ | |
| "n_estimators", | |
| "learning_rate", | |
| "max_depth", | |
| "n_clusters" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "max_depth controls how deep each tree can grow." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "What is a potential drawback of Random Forest?", | |
| "options": [ | |
| "Cannot classify data", | |
| "Needs feature scaling", | |
| "High memory usage", | |
| "Always underfits" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Training many trees can consume large memory and computation." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "What is the main reason Random Forest performs well compared to a single tree?", | |
| "options": [ | |
| "It removes features randomly", | |
| "It increases bias intentionally", | |
| "It uses deep neural layers", | |
| "It averages multiple trees to reduce variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Averaging multiple independent trees stabilizes the predictions and lowers overfitting." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "What does the term 'out-of-bag' (OOB) error mean in Random Forest?", | |
| "options": [ | |
| "Training error on all data", | |
| "Error on random subsets", | |
| "Loss on test set only", | |
| "Error on unseen samples not used in training trees" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "OOB error estimates model performance using samples not included in the bootstrap subset." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "How does Random Forest ensure diversity among trees?", | |
| "options": [ | |
| "By pruning all trees equally", | |
| "Using same random seed", | |
| "Random sampling of data and features", | |
| "Training all trees on same data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Bootstrapping and random feature selection introduce variation between trees." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Which of the following parameters controls the number of features considered for splitting?", | |
| "options": [ | |
| "min_samples_split", | |
| "max_features", | |
| "n_estimators", | |
| "max_depth" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "max_features limits how many features are chosen at each split, encouraging diversity." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "What happens if 'max_features' is set to 1 in a Random Forest?", | |
| "options": [ | |
| "Each tree becomes highly decorrelated", | |
| "All trees are identical", | |
| "Model becomes identical to a single tree", | |
| "Training stops early" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "When only one feature is chosen at each split, trees are very different, improving ensemble strength." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Which evaluation metric is best for imbalanced classification using Random Forest?", | |
| "options": [ | |
| "Accuracy", | |
| "F1-score", | |
| "MSE", | |
| "R²" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "F1-score balances precision and recall, making it ideal for imbalanced datasets." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Random Forest handles overfitting better than a single decision tree mainly due to?", | |
| "options": [ | |
| "Ensemble averaging", | |
| "Deep pruning", | |
| "More bias", | |
| "Gradient descent" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Averaging the outputs of multiple uncorrelated trees reduces overfitting." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "What is the typical relationship between bias and variance in Random Forest?", | |
| "options": [ | |
| "High bias, low variance", | |
| "Low bias, high variance", | |
| "High bias, high variance", | |
| "Low bias, low variance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest balances both bias and variance well due to its ensemble structure." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "In Random Forest, which trees are used to predict a test sample?", | |
| "options": [ | |
| "Random subset of trees", | |
| "Only first tree", | |
| "All trees in the ensemble", | |
| "Last tree only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Each tree contributes to prediction, and results are aggregated by majority voting." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "What is the purpose of 'random_state' in Random Forest?", | |
| "options": [ | |
| "Increasing randomness", | |
| "Feature selection", | |
| "Reproducibility", | |
| "Performance improvement" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "random_state ensures the same random sampling for consistent results." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "What is the role of 'min_samples_split' in Random Forest?", | |
| "options": [ | |
| "Number of bootstrap samples", | |
| "Total number of features used", | |
| "Maximum leaf nodes allowed", | |
| "Minimum number of samples required to split an internal node" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "It prevents splits when a node has too few samples, reducing overfitting." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "What is feature importance in Random Forest?", | |
| "options": [ | |
| "A pruning factor", | |
| "A clustering metric", | |
| "A measure of data imbalance", | |
| "A score showing how useful a feature is for prediction" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Feature importance reflects how much each feature reduces impurity in trees." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "What technique is used by Random Forest to combine multiple tree outputs?", | |
| "options": [ | |
| "Stacking", | |
| "Boosting", | |
| "Bagging", | |
| "Dropout" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest is based on bagging — bootstrap aggregation of decision trees." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "If Random Forest has too many trees, what is the likely result?", | |
| "options": [ | |
| "Accuracy decreases", | |
| "Overfitting increases", | |
| "Computation cost increases", | |
| "Model becomes unstable" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "After a certain number, adding trees only increases computation without much gain." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Which parameter limits how deep a tree can grow?", | |
| "options": [ | |
| "n_estimators", | |
| "max_depth", | |
| "criterion", | |
| "max_features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "max_depth sets the maximum depth, controlling model complexity." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "What is the main drawback of Random Forest in large datasets?", | |
| "options": [ | |
| "Low accuracy", | |
| "High computational cost", | |
| "High bias", | |
| "No randomness" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Training hundreds of trees can be time-consuming for large datasets." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Which of these can Random Forest NOT handle directly?", | |
| "options": [ | |
| "Categorical data", | |
| "Sequential time dependencies", | |
| "Missing values", | |
| "Large datasets" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest doesn’t model time dependencies, so it's not ideal for time series." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "How is randomness introduced in Random Forest?", | |
| "options": [ | |
| "Bootstrap sampling and random feature selection", | |
| "Gradient descent", | |
| "Batch normalization", | |
| "Learning rate scheduling" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Random Forest introduces randomness both in data and feature sampling." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "What type of ensemble method is Random Forest?", | |
| "options": [ | |
| "Voting", | |
| "Bagging", | |
| "Boosting", | |
| "Stacking" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest uses bagging (bootstrap aggregation) to train multiple trees." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "What is the relationship between Decision Tree depth and overfitting?", | |
| "options": [ | |
| "Deeper trees tend to overfit", | |
| "Deeper trees always underfit", | |
| "Depth has no effect", | |
| "Shallow trees always overfit" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Large tree depth can cause the model to memorize training data patterns." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "What happens to the Random Forest model if trees are too shallow?", | |
| "options": [ | |
| "Model overfits", | |
| "Training time increases", | |
| "Variance increases", | |
| "Model underfits" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Shallow trees can't capture complex data patterns." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Why does Random Forest not require feature scaling?", | |
| "options": [ | |
| "It normalizes automatically", | |
| "It splits based on thresholds, not distance", | |
| "It uses Euclidean distance", | |
| "It drops correlated features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Tree-based methods are invariant to feature scaling." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "What happens if all trees are trained on identical bootstrap samples?", | |
| "options": [ | |
| "Higher accuracy", | |
| "No effect", | |
| "Reduced diversity", | |
| "Faster training" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Lack of randomness among trees reduces ensemble benefit." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Which statement is TRUE about Random Forest?", | |
| "options": [ | |
| "It removes all bias", | |
| "It reduces bias but increases variance", | |
| "It increases both bias and variance", | |
| "It reduces variance but keeps bias low" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Bagging in Random Forest reduces variance without significantly increasing bias." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "In Random Forest, what does 'bootstrap=True' mean?", | |
| "options": [ | |
| "No randomness is applied", | |
| "Each tree skips feature selection", | |
| "All trees use the full dataset", | |
| "Each tree is trained on a random sample with replacement" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Bootstrap sampling ensures each tree sees a slightly different dataset." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "How is feature importance calculated in Random Forest?", | |
| "options": [ | |
| "Based on learning rate", | |
| "Using feature frequency", | |
| "By gradient descent", | |
| "Based on impurity reduction" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "It measures how much each feature decreases node impurity across all trees." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "What is a typical hyperparameter tuning technique for Random Forest?", | |
| "options": [ | |
| "Grid Search or Random Search", | |
| "K-means", | |
| "Dropout", | |
| "Gradient Descent" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Both Grid and Random Search are popular for hyperparameter tuning." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "What happens if we set 'n_estimators' too high?", | |
| "options": [ | |
| "Lower accuracy", | |
| "Longer training time", | |
| "Underfitting", | |
| "Loss of randomness" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Too many trees make training slow, though accuracy improvement becomes marginal." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "How is Random Forest resistant to overfitting?", | |
| "options": [ | |
| "Using deeper trees", | |
| "Gradient correction", | |
| "Averaging independent trees", | |
| "Removing bias" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Averaging many independent models cancels out noise and variance." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Which of the following best describes the Random Forest algorithm?", | |
| "options": [ | |
| "A single large decision tree", | |
| "Linear regression with trees", | |
| "Stacked boosting method", | |
| "Ensemble of decision trees trained on random subsets" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest is an ensemble approach using bagging and random feature selection." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "What is the main reason Random Forest works well even with noisy data?", | |
| "options": [ | |
| "It applies dropout regularization", | |
| "It removes noise automatically", | |
| "It memorizes noise across all trees", | |
| "It averages multiple trees to smooth out noise" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Averaging predictions of multiple trees reduces the impact of noise in data." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Which technique helps Random Forest estimate generalization error without a validation set?", | |
| "options": [ | |
| "Cross-validation only", | |
| "Early stopping", | |
| "Out-of-Bag (OOB) estimation", | |
| "Dropout sampling" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "OOB samples are not seen during training, allowing internal error estimation." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "What is the effect of increasing 'min_samples_split' too much?", | |
| "options": [ | |
| "Model may underfit", | |
| "Model may overfit", | |
| "Training crashes", | |
| "Bias becomes zero" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Larger 'min_samples_split' prevents deeper splits, reducing learning capacity." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "What is the typical output of Random Forest in binary classification?", | |
| "options": [ | |
| "Always continuous output", | |
| "Softmax score", | |
| "Only probability", | |
| "Majority class from all trees" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest uses majority voting to decide final class." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "In Random Forest, what happens if we disable bootstrap sampling?", | |
| "options": [ | |
| "All trees become identical", | |
| "Each tree will see full dataset", | |
| "Training becomes impossible", | |
| "Feature importance cannot be calculated" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "bootstrap=False means no sampling, trees are trained on complete dataset." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Which Random Forest parameter controls how many features a single split considers?", | |
| "options": [ | |
| "min_samples_split", | |
| "max_depth", | |
| "max_features", | |
| "n_estimators" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Randomly selecting only 'max_features' at each split ensures diversity." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Which situation is most ideal for using Random Forest?", | |
| "options": [ | |
| "Low-dimensional time series", | |
| "Fully labeled image datasets only", | |
| "Continuous text data", | |
| "High-dimensional structured tabular data" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest is excellent for large structured numeric + categorical datasets." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "How does Random Forest improve generalization?", | |
| "options": [ | |
| "By memorizing data patterns", | |
| "By deep pruning all trees", | |
| "By increasing bias", | |
| "By reducing variance using averaging" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Averaging predictions from many uncorrelated trees reduces variance." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "What is a scenario where Random Forest might perform poorly?", | |
| "options": [ | |
| "Large tabular dataset", | |
| "Handling missing values", | |
| "Highly sequential time-based data", | |
| "Text classification with manual encoding" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest is not designed to understand sequential temporal dependencies." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "What is the advantage of using 'max_samples' parameter in Random Forest?", | |
| "options": [ | |
| "It forces normalization", | |
| "It increases tree depth", | |
| "It controls how many samples each tree sees", | |
| "It controls feature count" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "max_samples limits data per tree to improve speed and variability." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Why is Random Forest called a 'bagging' technique?", | |
| "options": [ | |
| "It merges deep networks", | |
| "It sequentially boosts errors", | |
| "It uses bootstrap sampling + aggregation", | |
| "It stacks models layer by layer" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Random Forest is based on Bagging = Bootstrap + Aggregation." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "What is the role of 'n_jobs' parameter in Random Forest?", | |
| "options": [ | |
| "Controls parallel processing", | |
| "Controls noise injection", | |
| "Controls memory allocation", | |
| "Controls feature removal" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "n_jobs defines how many CPU cores to use in training." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "What happens if trees in a Random Forest are highly correlated?", | |
| "options": [ | |
| "Bias becomes zero", | |
| "Performance decreases", | |
| "No effect", | |
| "Accuracy increases massively" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Less diversity among trees means less benefit from ensemble averaging." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Why is Random Forest naturally resistant to overfitting?", | |
| "options": [ | |
| "Because it always uses shallow trees", | |
| "Because it restricts learning", | |
| "Because it averages predictions from multiple trees", | |
| "Because it limits depth" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Averaging predictions reduces variance and overfitting." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "What is the output of feature importance scores in Random Forest?", | |
| "options": [ | |
| "Relative importance values per feature", | |
| "Loss graph", | |
| "Class probability distribution", | |
| "Confusion matrix" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Feature importance shows which features contribute most to splits." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Which of these indicates Random Forest overfitting?", | |
| "options": [ | |
| "High training accuracy, low test accuracy", | |
| "Slow training time only", | |
| "Equal train and test accuracy", | |
| "Low training accuracy, high test accuracy" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Overfitting means model fits training well but generalizes poorly." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "What is a good reason to increase 'min_samples_leaf'?", | |
| "options": [ | |
| "To reduce bias", | |
| "To force normalization", | |
| "To reduce overfitting", | |
| "To increase overfitting" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Larger leaves generalize better by preventing overly specific splits." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Which Random Forest parameter can reduce model size and computation?", | |
| "options": [ | |
| "max_depth", | |
| "All of the above", | |
| "n_estimators", | |
| "max_samples" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Reducing number of trees, depth, or samples lowers computational load." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Which part of Random Forest helps most against overfitting?", | |
| "options": [ | |
| "Gradient correction", | |
| "Feature normalization", | |
| "Deep trees", | |
| "Bagging" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Bagging reduces variance by training trees independently on random subsets." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "What is the disadvantage of using very small 'max_depth' in Random Forest?", | |
| "options": [ | |
| "Unbalanced samples", | |
| "Memory leak", | |
| "Overfitting", | |
| "Underfitting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Very shallow trees cannot capture complex relationships." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "How does Random Forest handle feature correlation?", | |
| "options": [ | |
| "It removes correlated features by default", | |
| "It may give correlated features lower importance", | |
| "It fails completely", | |
| "It merges correlated features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "If two features are correlated, importance may be split between them." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "What is 'Gini Importance' in Random Forest?", | |
| "options": [ | |
| "Metric to find best cluster", | |
| "Loss function for optimization", | |
| "Error on OOB samples", | |
| "Measure of how much a feature reduces impurity" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "It quantifies impurity reduction contributed by each feature." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Why is Random Forest not ideal for time-series forecasting?", | |
| "options": [ | |
| "It needs scaling", | |
| "It ignores temporal order", | |
| "It can't process numbers", | |
| "It overfits too much" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest treats data as independent samples, ignoring sequence dependence." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "What is a sign that 'n_estimators' should be increased?", | |
| "options": [ | |
| "Very fast training", | |
| "Perfect accuracy", | |
| "High test variance", | |
| "Low training accuracy only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Increasing trees reduces prediction variance and stabilizes model." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "What is 'entropy' used for in Random Forest?", | |
| "options": [ | |
| "Learning rate control", | |
| "Feature normalization", | |
| "Pruning strategy", | |
| "Split quality measure" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Entropy and Gini are purity measures used to decide best splits." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Which scenario may require reducing 'max_depth'?", | |
| "options": [ | |
| "When training time is extremely short", | |
| "When features are few", | |
| "When training accuracy is perfect but test accuracy is low", | |
| "When both accuracies are low" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "This indicates overfitting — reducing depth increases generalization." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "What is one major strength of Random Forest?", | |
| "options": [ | |
| "Perfect for text generation", | |
| "Robust to noise and overfitting", | |
| "Predicts time trends", | |
| "Always fastest model" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest is sturdy against noisy data due to ensemble averaging." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Increasing 'min_samples_leaf' will most likely:", | |
| "options": [ | |
| "Make model generalize better", | |
| "Decrease bias heavily", | |
| "Increase training variance", | |
| "Increase memorization" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Larger leaves lead to simpler splits and better generalization." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Which metric is best for class imbalance evaluation in Random Forest?", | |
| "options": [ | |
| "MSE", | |
| "Recall / F1-score", | |
| "Accuracy only", | |
| "R-squared" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "F1 handles imbalanced data better by balancing precision and recall." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "What happens if 'max_features' is too high?", | |
| "options": [ | |
| "Lower training accuracy", | |
| "Trees become more random", | |
| "Trees become more similar", | |
| "OOB error becomes undefined" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "More features → less randomness → higher correlation between trees." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Which combination may indicate optimal Random Forest tuning?", | |
| "options": [ | |
| "Low accuracy on both", | |
| "High train accuracy, high test accuracy", | |
| "Low train accuracy, high test accuracy", | |
| "High train accuracy, low test accuracy" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "This indicates low bias and low variance — a well-generalized model." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Why doesn’t Random Forest require much hyperparameter tuning compared to other models?", | |
| "options": [ | |
| "It ignores input data", | |
| "It is robust to overfitting and variance", | |
| "It always needs deep tuning", | |
| "It cannot be tuned" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Random Forest naturally reduces variance and overfitting, making it less sensitive to hyperparameters." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "What is the effect of increasing 'n_estimators' on OOB error?", | |
| "options": [ | |
| "OOB error is unaffected", | |
| "OOB error fluctuates randomly", | |
| "OOB error usually decreases and stabilizes", | |
| "OOB error increases" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "More trees provide a better estimate of error and reduce variance of predictions." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Which is true about correlated features in Random Forest?", | |
| "options": [ | |
| "Correlation is ignored completely", | |
| "Random Forest fails with correlation", | |
| "Correlated features are removed automatically", | |
| "Importance may be split among correlated features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "When features are correlated, importance scores may be shared, lowering individual scores." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Random Forest is considered a black-box model because?", | |
| "options": [ | |
| "It outputs linear coefficients", | |
| "It uses shallow trees only", | |
| "It is hard to interpret individual predictions", | |
| "It has only one tree" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "The ensemble of many trees makes it difficult to trace exact reasoning for predictions." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Which is a good approach to reduce Random Forest computation on very large datasets?", | |
| "options": [ | |
| "Remove bagging", | |
| "Use all features", | |
| "Increase depth", | |
| "Reduce 'n_estimators' or use 'max_samples'" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Fewer trees or smaller bootstrap samples lower computational cost." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Why is Random Forest more stable than a single Decision Tree?", | |
| "options": [ | |
| "Because it uses scaling", | |
| "Because it prunes all trees heavily", | |
| "Because it has only one tree", | |
| "Because predictions are averaged over many trees" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Averaging reduces sensitivity to noise and variance in data." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "What kind of bias-variance tradeoff does Random Forest achieve?", | |
| "options": [ | |
| "High bias, low variance", | |
| "Low bias, low variance", | |
| "High bias, high variance", | |
| "Low bias, high variance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Bagging ensures variance reduction while keeping bias relatively low." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Which Random Forest feature allows quick insight into feature relevance?", | |
| "options": [ | |
| "Feature importance scores", | |
| "OOB error", | |
| "min_samples_split", | |
| "max_depth" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "These scores help identify which features are most influential in predictions." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "In Random Forest classification, which method aggregates the outputs of all trees?", | |
| "options": [ | |
| "Gradient boosting", | |
| "Weighted averaging", | |
| "Softmax", | |
| "Majority voting" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Random Forest takes the class predicted by the majority of trees as the final output." | |
| } | |
| ] | |
| } | |