Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

File size: 41,550 Bytes

0d00d62

{
  "title": "Random Forest Mastery: 100 MCQs",
  "description": "A comprehensive set of multiple-choice questions designed to test and deepen your understanding of Random Forest, covering fundamentals, parameters, ensemble concepts, and practical applications.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is Random Forest primarily used for?",
      "options": [
        "Only Clustering",
        "Only Time Series",
        "Only Image Processing",
        "Classification and Regression"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest is a versatile ensemble method used for both classification and regression tasks."
    },
    {
      "id": 2,
      "questionText": "Random Forest is an example of which type of learning?",
      "options": [
        "Supervised Learning",
        "Unsupervised Learning",
        "Self-Supervised Learning",
        "Reinforcement Learning"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Random Forest is trained using labeled data, so it is supervised learning."
    },
    {
      "id": 3,
      "questionText": "What is the base algorithm used inside a Random Forest?",
      "options": [
        "Linear Regression",
        "K-Means",
        "Decision Trees",
        "Neural Networks"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest builds multiple Decision Trees and combines them."
    },
    {
      "id": 4,
      "questionText": "Why is it called 'Random' Forest?",
      "options": [
        "Because it gives random answers",
        "Because trees are random shapes",
        "Because it uses randomness in data and features",
        "Because it is used randomly"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest randomly selects data samples and features to build diverse trees."
    },
    {
      "id": 5,
      "questionText": "What does Random Forest reduce compared to a single Decision Tree?",
      "options": [
        "Accuracy",
        "Computation Time",
        "Overfitting",
        "Data Size"
      ],
      "correctAnswerIndex": 2,
      "explanation": "By combining many trees, Random Forest reduces overfitting."
    },
    {
      "id": 6,
      "questionText": "What technique does Random Forest use to train different trees?",
      "options": [
        "Gradient Descent",
        "Bootstrap Sampling",
        "Dropout",
        "Pooling"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest uses bootstrap sampling (bagging) to create different training subsets."
    },
    {
      "id": 7,
      "questionText": "Random Forest is an example of which ensemble method?",
      "options": [
        "Boosting",
        "Stacking",
        "Bagging",
        "Reinforcement"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest is a bagging-based ensemble learning method."
    },
    {
      "id": 8,
      "questionText": "Which metric is commonly used to measure feature importance in Random Forest?",
      "options": [
        "Euclidean Distance",
        "Entropy Loss",
        "Gini Importance",
        "Cosine Similarity"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Gini Impurity is used to decide splits, and feature importance is derived from it."
    },
    {
      "id": 9,
      "questionText": "What does each individual tree in a Random Forest output during classification?",
      "options": [
        "A regression score only",
        "A class prediction",
        "A probability distribution",
        "A clustering label"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Each tree predicts a class, and Random Forest takes the majority vote."
    },
    {
      "id": 10,
      "questionText": "How does Random Forest make the final prediction in classification?",
      "options": [
        "Majority voting",
        "Max pooling",
        "Averaging",
        "Sorting"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Random Forest predicts the class with the highest number of votes from trees."
    },
    {
      "id": 11,
      "questionText": "What happens if we increase the number of trees in Random Forest?",
      "options": [
        "Accuracy usually improves",
        "Model becomes unstable",
        "Accuracy always decreases",
        "It deletes trees randomly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "More trees reduce variance and improve accuracy until a saturation point."
    },
    {
      "id": 12,
      "questionText": "What kind of data can Random Forest handle?",
      "options": [
        "Only numerical",
        "Only text data",
        "Both categorical and numerical",
        "Only time series"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest works well with mixed data types."
    },
    {
      "id": 13,
      "questionText": "Random Forest is robust to which problem?",
      "options": [
        "Large memory usage",
        "Outliers",
        "Class imbalance",
        "Overfitting"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest reduces overfitting by combining multiple trees."
    },
    {
      "id": 14,
      "questionText": "What is the default criterion for splitting nodes in Random Forest classification?",
      "options": [
        "MAE",
        "Gini Impurity",
        "MSE",
        "Cosine Distance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Gini impurity is the default split criterion for classification."
    },
    {
      "id": 15,
      "questionText": "How does Random Forest handle missing values?",
      "options": [
        "It ignores all rows",
        "It can handle them fairly well",
        "It crashes immediately",
        "It replaces them with zeros"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest can handle missing values better than many algorithms."
    },
    {
      "id": 16,
      "questionText": "What is the advantage of Random Forest over a single Decision Tree?",
      "options": [
        "No training required",
        "Always 100% accuracy",
        "Higher accuracy",
        "Less training time"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest is more accurate than a single Decision Tree due to ensemble voting."
    },
    {
      "id": 17,
      "questionText": "What type of sampling is used in Random Forest?",
      "options": [
        "Sequential sampling",
        "Sampling with replacement",
        "K-fold only",
        "Sampling without replacement"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest uses bootstrap sampling, which is sampling with replacement."
    },
    {
      "id": 18,
      "questionText": "What does each tree in Random Forest learn from?",
      "options": [
        "Only 50% of all features",
        "Only one class of data",
        "A random subset of data",
        "The entire dataset"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Each tree is trained on different bootstrapped samples."
    },
    {
      "id": 19,
      "questionText": "What happens if the number of trees is too small?",
      "options": [
        "Model becomes overconfident",
        "It increases memory usage too much",
        "It always overfits",
        "Model may underfit"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Too few trees may result in underfitting and poor accuracy."
    },
    {
      "id": 20,
      "questionText": "Random Forest reduces variance by?",
      "options": [
        "Adding dropout",
        "Averaging multiple trees",
        "Increasing learning rate",
        "Minimizing entropy"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Averaging predictions reduces variance and improves generalization."
    },
    {
      "id": 21,
      "questionText": "What is the method used to combine predictions in Random Forest?",
      "options": [
        "Majority voting",
        "Stacking",
        "Gradient descent",
        "Concatenation"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Classification is done using majority vote."
    },
    {
      "id": 22,
      "questionText": "What happens during training if two trees see different features?",
      "options": [
        "They predict randomly",
        "They become identical",
        "They learn different patterns",
        "They crash"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Feature randomness ensures diverse learning across trees."
    },
    {
      "id": 23,
      "questionText": "Is Random Forest sensitive to feature scaling?",
      "options": [
        "Yes",
        "Only for categorical features",
        "Only for small datasets",
        "No"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest does not require normalization or scaling."
    },
    {
      "id": 24,
      "questionText": "Random Forest internally uses how many Decision Trees?",
      "options": [
        "Based on dataset size",
        "User-defined number",
        "Exactly 10",
        "Always 1"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The number of trees is set by the user using the 'n_estimators' parameter."
    },
    {
      "id": 25,
      "questionText": "Random Forest works well when the dataset is?",
      "options": [
        "Only with time series",
        "Large with many features",
        "Only with text data",
        "Very small only"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest performs well with high-dimensional and large datasets."
    },
    {
      "id": 26,
      "questionText": "What is the output of Random Forest for binary classification?",
      "options": [
        "Probability only",
        "Only 1",
        "Only 0",
        "0 or 1"
      ],
      "correctAnswerIndex": 3,
      "explanation": "The final output is a class label like 0 or 1."
    },
    {
      "id": 27,
      "questionText": "What is 'n_estimators' in Random Forest?",
      "options": [
        "Number of features",
        "Number of layers",
        "Number of epochs",
        "Number of trees"
      ],
      "correctAnswerIndex": 3,
      "explanation": "'n_estimators' defines how many Decision Trees to train."
    },
    {
      "id": 28,
      "questionText": "What happens if all trees in Random Forest agree?",
      "options": [
        "Model crashes",
        "Accuracy drops",
        "High confidence in prediction",
        "It becomes regression"
      ],
      "correctAnswerIndex": 2,
      "explanation": "More agreement among trees increases prediction confidence."
    },
    {
      "id": 29,
      "questionText": "Which parameter controls the depth of trees in Random Forest?",
      "options": [
        "n_estimators",
        "learning_rate",
        "max_depth",
        "n_clusters"
      ],
      "correctAnswerIndex": 2,
      "explanation": "max_depth controls how deep each tree can grow."
    },
    {
      "id": 30,
      "questionText": "What is a potential drawback of Random Forest?",
      "options": [
        "Cannot classify data",
        "Needs feature scaling",
        "High memory usage",
        "Always underfits"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Training many trees can consume large memory and computation."
    },
    {
      "id": 31,
      "questionText": "What is the main reason Random Forest performs well compared to a single tree?",
      "options": [
        "It removes features randomly",
        "It increases bias intentionally",
        "It uses deep neural layers",
        "It averages multiple trees to reduce variance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Averaging multiple independent trees stabilizes the predictions and lowers overfitting."
    },
    {
      "id": 32,
      "questionText": "What does the term 'out-of-bag' (OOB) error mean in Random Forest?",
      "options": [
        "Training error on all data",
        "Error on random subsets",
        "Loss on test set only",
        "Error on unseen samples not used in training trees"
      ],
      "correctAnswerIndex": 3,
      "explanation": "OOB error estimates model performance using samples not included in the bootstrap subset."
    },
    {
      "id": 33,
      "questionText": "How does Random Forest ensure diversity among trees?",
      "options": [
        "By pruning all trees equally",
        "Using same random seed",
        "Random sampling of data and features",
        "Training all trees on same data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Bootstrapping and random feature selection introduce variation between trees."
    },
    {
      "id": 34,
      "questionText": "Which of the following parameters controls the number of features considered for splitting?",
      "options": [
        "min_samples_split",
        "max_features",
        "n_estimators",
        "max_depth"
      ],
      "correctAnswerIndex": 1,
      "explanation": "max_features limits how many features are chosen at each split, encouraging diversity."
    },
    {
      "id": 35,
      "questionText": "What happens if 'max_features' is set to 1 in a Random Forest?",
      "options": [
        "Each tree becomes highly decorrelated",
        "All trees are identical",
        "Model becomes identical to a single tree",
        "Training stops early"
      ],
      "correctAnswerIndex": 0,
      "explanation": "When only one feature is chosen at each split, trees are very different, improving ensemble strength."
    },
    {
      "id": 36,
      "questionText": "Which evaluation metric is best for imbalanced classification using Random Forest?",
      "options": [
        "Accuracy",
        "F1-score",
        "MSE",
        "R²"
      ],
      "correctAnswerIndex": 1,
      "explanation": "F1-score balances precision and recall, making it ideal for imbalanced datasets."
    },
    {
      "id": 37,
      "questionText": "Random Forest handles overfitting better than a single decision tree mainly due to?",
      "options": [
        "Ensemble averaging",
        "Deep pruning",
        "More bias",
        "Gradient descent"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Averaging the outputs of multiple uncorrelated trees reduces overfitting."
    },
    {
      "id": 38,
      "questionText": "What is the typical relationship between bias and variance in Random Forest?",
      "options": [
        "High bias, low variance",
        "Low bias, high variance",
        "High bias, high variance",
        "Low bias, low variance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest balances both bias and variance well due to its ensemble structure."
    },
    {
      "id": 39,
      "questionText": "In Random Forest, which trees are used to predict a test sample?",
      "options": [
        "Random subset of trees",
        "Only first tree",
        "All trees in the ensemble",
        "Last tree only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Each tree contributes to prediction, and results are aggregated by majority voting."
    },
    {
      "id": 40,
      "questionText": "What is the purpose of 'random_state' in Random Forest?",
      "options": [
        "Increasing randomness",
        "Feature selection",
        "Reproducibility",
        "Performance improvement"
      ],
      "correctAnswerIndex": 2,
      "explanation": "random_state ensures the same random sampling for consistent results."
    },
    {
      "id": 41,
      "questionText": "What is the role of 'min_samples_split' in Random Forest?",
      "options": [
        "Number of bootstrap samples",
        "Total number of features used",
        "Maximum leaf nodes allowed",
        "Minimum number of samples required to split an internal node"
      ],
      "correctAnswerIndex": 3,
      "explanation": "It prevents splits when a node has too few samples, reducing overfitting."
    },
    {
      "id": 42,
      "questionText": "What is feature importance in Random Forest?",
      "options": [
        "A pruning factor",
        "A clustering metric",
        "A measure of data imbalance",
        "A score showing how useful a feature is for prediction"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Feature importance reflects how much each feature reduces impurity in trees."
    },
    {
      "id": 43,
      "questionText": "What technique is used by Random Forest to combine multiple tree outputs?",
      "options": [
        "Stacking",
        "Boosting",
        "Bagging",
        "Dropout"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest is based on bagging — bootstrap aggregation of decision trees."
    },
    {
      "id": 44,
      "questionText": "If Random Forest has too many trees, what is the likely result?",
      "options": [
        "Accuracy decreases",
        "Overfitting increases",
        "Computation cost increases",
        "Model becomes unstable"
      ],
      "correctAnswerIndex": 2,
      "explanation": "After a certain number, adding trees only increases computation without much gain."
    },
    {
      "id": 45,
      "questionText": "Which parameter limits how deep a tree can grow?",
      "options": [
        "n_estimators",
        "max_depth",
        "criterion",
        "max_features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "max_depth sets the maximum depth, controlling model complexity."
    },
    {
      "id": 46,
      "questionText": "What is the main drawback of Random Forest in large datasets?",
      "options": [
        "Low accuracy",
        "High computational cost",
        "High bias",
        "No randomness"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Training hundreds of trees can be time-consuming for large datasets."
    },
    {
      "id": 47,
      "questionText": "Which of these can Random Forest NOT handle directly?",
      "options": [
        "Categorical data",
        "Sequential time dependencies",
        "Missing values",
        "Large datasets"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest doesn’t model time dependencies, so it's not ideal for time series."
    },
    {
      "id": 48,
      "questionText": "How is randomness introduced in Random Forest?",
      "options": [
        "Bootstrap sampling and random feature selection",
        "Gradient descent",
        "Batch normalization",
        "Learning rate scheduling"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Random Forest introduces randomness both in data and feature sampling."
    },
    {
      "id": 49,
      "questionText": "What type of ensemble method is Random Forest?",
      "options": [
        "Voting",
        "Bagging",
        "Boosting",
        "Stacking"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest uses bagging (bootstrap aggregation) to train multiple trees."
    },
    {
      "id": 50,
      "questionText": "What is the relationship between Decision Tree depth and overfitting?",
      "options": [
        "Deeper trees tend to overfit",
        "Deeper trees always underfit",
        "Depth has no effect",
        "Shallow trees always overfit"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Large tree depth can cause the model to memorize training data patterns."
    },
    {
      "id": 51,
      "questionText": "What happens to the Random Forest model if trees are too shallow?",
      "options": [
        "Model overfits",
        "Training time increases",
        "Variance increases",
        "Model underfits"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Shallow trees can't capture complex data patterns."
    },
    {
      "id": 52,
      "questionText": "Why does Random Forest not require feature scaling?",
      "options": [
        "It normalizes automatically",
        "It splits based on thresholds, not distance",
        "It uses Euclidean distance",
        "It drops correlated features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Tree-based methods are invariant to feature scaling."
    },
    {
      "id": 53,
      "questionText": "What happens if all trees are trained on identical bootstrap samples?",
      "options": [
        "Higher accuracy",
        "No effect",
        "Reduced diversity",
        "Faster training"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Lack of randomness among trees reduces ensemble benefit."
    },
    {
      "id": 54,
      "questionText": "Which statement is TRUE about Random Forest?",
      "options": [
        "It removes all bias",
        "It reduces bias but increases variance",
        "It increases both bias and variance",
        "It reduces variance but keeps bias low"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Bagging in Random Forest reduces variance without significantly increasing bias."
    },
    {
      "id": 55,
      "questionText": "In Random Forest, what does 'bootstrap=True' mean?",
      "options": [
        "No randomness is applied",
        "Each tree skips feature selection",
        "All trees use the full dataset",
        "Each tree is trained on a random sample with replacement"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Bootstrap sampling ensures each tree sees a slightly different dataset."
    },
    {
      "id": 56,
      "questionText": "How is feature importance calculated in Random Forest?",
      "options": [
        "Based on learning rate",
        "Using feature frequency",
        "By gradient descent",
        "Based on impurity reduction"
      ],
      "correctAnswerIndex": 3,
      "explanation": "It measures how much each feature decreases node impurity across all trees."
    },
    {
      "id": 57,
      "questionText": "What is a typical hyperparameter tuning technique for Random Forest?",
      "options": [
        "Grid Search or Random Search",
        "K-means",
        "Dropout",
        "Gradient Descent"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Both Grid and Random Search are popular for hyperparameter tuning."
    },
    {
      "id": 58,
      "questionText": "What happens if we set 'n_estimators' too high?",
      "options": [
        "Lower accuracy",
        "Longer training time",
        "Underfitting",
        "Loss of randomness"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Too many trees make training slow, though accuracy improvement becomes marginal."
    },
    {
      "id": 59,
      "questionText": "How is Random Forest resistant to overfitting?",
      "options": [
        "Using deeper trees",
        "Gradient correction",
        "Averaging independent trees",
        "Removing bias"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Averaging many independent models cancels out noise and variance."
    },
    {
      "id": 60,
      "questionText": "Which of the following best describes the Random Forest algorithm?",
      "options": [
        "A single large decision tree",
        "Linear regression with trees",
        "Stacked boosting method",
        "Ensemble of decision trees trained on random subsets"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest is an ensemble approach using bagging and random feature selection."
    },
    {
      "id": 61,
      "questionText": "What is the main reason Random Forest works well even with noisy data?",
      "options": [
        "It applies dropout regularization",
        "It removes noise automatically",
        "It memorizes noise across all trees",
        "It averages multiple trees to smooth out noise"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Averaging predictions of multiple trees reduces the impact of noise in data."
    },
    {
      "id": 62,
      "questionText": "Which technique helps Random Forest estimate generalization error without a validation set?",
      "options": [
        "Cross-validation only",
        "Early stopping",
        "Out-of-Bag (OOB) estimation",
        "Dropout sampling"
      ],
      "correctAnswerIndex": 2,
      "explanation": "OOB samples are not seen during training, allowing internal error estimation."
    },
    {
      "id": 63,
      "questionText": "What is the effect of increasing 'min_samples_split' too much?",
      "options": [
        "Model may underfit",
        "Model may overfit",
        "Training crashes",
        "Bias becomes zero"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Larger 'min_samples_split' prevents deeper splits, reducing learning capacity."
    },
    {
      "id": 64,
      "questionText": "What is the typical output of Random Forest in binary classification?",
      "options": [
        "Always continuous output",
        "Softmax score",
        "Only probability",
        "Majority class from all trees"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest uses majority voting to decide final class."
    },
    {
      "id": 65,
      "questionText": "In Random Forest, what happens if we disable bootstrap sampling?",
      "options": [
        "All trees become identical",
        "Each tree will see full dataset",
        "Training becomes impossible",
        "Feature importance cannot be calculated"
      ],
      "correctAnswerIndex": 1,
      "explanation": "bootstrap=False means no sampling, trees are trained on complete dataset."
    },
    {
      "id": 66,
      "questionText": "Which Random Forest parameter controls how many features a single split considers?",
      "options": [
        "min_samples_split",
        "max_depth",
        "max_features",
        "n_estimators"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Randomly selecting only 'max_features' at each split ensures diversity."
    },
    {
      "id": 67,
      "questionText": "Which situation is most ideal for using Random Forest?",
      "options": [
        "Low-dimensional time series",
        "Fully labeled image datasets only",
        "Continuous text data",
        "High-dimensional structured tabular data"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest is excellent for large structured numeric + categorical datasets."
    },
    {
      "id": 68,
      "questionText": "How does Random Forest improve generalization?",
      "options": [
        "By memorizing data patterns",
        "By deep pruning all trees",
        "By increasing bias",
        "By reducing variance using averaging"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Averaging predictions from many uncorrelated trees reduces variance."
    },
    {
      "id": 69,
      "questionText": "What is a scenario where Random Forest might perform poorly?",
      "options": [
        "Large tabular dataset",
        "Handling missing values",
        "Highly sequential time-based data",
        "Text classification with manual encoding"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest is not designed to understand sequential temporal dependencies."
    },
    {
      "id": 70,
      "questionText": "What is the advantage of using 'max_samples' parameter in Random Forest?",
      "options": [
        "It forces normalization",
        "It increases tree depth",
        "It controls how many samples each tree sees",
        "It controls feature count"
      ],
      "correctAnswerIndex": 2,
      "explanation": "max_samples limits data per tree to improve speed and variability."
    },
    {
      "id": 71,
      "questionText": "Why is Random Forest called a 'bagging' technique?",
      "options": [
        "It merges deep networks",
        "It sequentially boosts errors",
        "It uses bootstrap sampling + aggregation",
        "It stacks models layer by layer"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Random Forest is based on Bagging = Bootstrap + Aggregation."
    },
    {
      "id": 72,
      "questionText": "What is the role of 'n_jobs' parameter in Random Forest?",
      "options": [
        "Controls parallel processing",
        "Controls noise injection",
        "Controls memory allocation",
        "Controls feature removal"
      ],
      "correctAnswerIndex": 0,
      "explanation": "n_jobs defines how many CPU cores to use in training."
    },
    {
      "id": 73,
      "questionText": "What happens if trees in a Random Forest are highly correlated?",
      "options": [
        "Bias becomes zero",
        "Performance decreases",
        "No effect",
        "Accuracy increases massively"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Less diversity among trees means less benefit from ensemble averaging."
    },
    {
      "id": 74,
      "questionText": "Why is Random Forest naturally resistant to overfitting?",
      "options": [
        "Because it always uses shallow trees",
        "Because it restricts learning",
        "Because it averages predictions from multiple trees",
        "Because it limits depth"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Averaging predictions reduces variance and overfitting."
    },
    {
      "id": 75,
      "questionText": "What is the output of feature importance scores in Random Forest?",
      "options": [
        "Relative importance values per feature",
        "Loss graph",
        "Class probability distribution",
        "Confusion matrix"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Feature importance shows which features contribute most to splits."
    },
    {
      "id": 76,
      "questionText": "Which of these indicates Random Forest overfitting?",
      "options": [
        "High training accuracy, low test accuracy",
        "Slow training time only",
        "Equal train and test accuracy",
        "Low training accuracy, high test accuracy"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Overfitting means model fits training well but generalizes poorly."
    },
    {
      "id": 77,
      "questionText": "What is a good reason to increase 'min_samples_leaf'?",
      "options": [
        "To reduce bias",
        "To force normalization",
        "To reduce overfitting",
        "To increase overfitting"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Larger leaves generalize better by preventing overly specific splits."
    },
    {
      "id": 78,
      "questionText": "Which Random Forest parameter can reduce model size and computation?",
      "options": [
        "max_depth",
        "All of the above",
        "n_estimators",
        "max_samples"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Reducing number of trees, depth, or samples lowers computational load."
    },
    {
      "id": 79,
      "questionText": "Which part of Random Forest helps most against overfitting?",
      "options": [
        "Gradient correction",
        "Feature normalization",
        "Deep trees",
        "Bagging"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Bagging reduces variance by training trees independently on random subsets."
    },
    {
      "id": 80,
      "questionText": "What is the disadvantage of using very small 'max_depth' in Random Forest?",
      "options": [
        "Unbalanced samples",
        "Memory leak",
        "Overfitting",
        "Underfitting"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Very shallow trees cannot capture complex relationships."
    },
    {
      "id": 81,
      "questionText": "How does Random Forest handle feature correlation?",
      "options": [
        "It removes correlated features by default",
        "It may give correlated features lower importance",
        "It fails completely",
        "It merges correlated features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "If two features are correlated, importance may be split between them."
    },
    {
      "id": 82,
      "questionText": "What is 'Gini Importance' in Random Forest?",
      "options": [
        "Metric to find best cluster",
        "Loss function for optimization",
        "Error on OOB samples",
        "Measure of how much a feature reduces impurity"
      ],
      "correctAnswerIndex": 3,
      "explanation": "It quantifies impurity reduction contributed by each feature."
    },
    {
      "id": 83,
      "questionText": "Why is Random Forest not ideal for time-series forecasting?",
      "options": [
        "It needs scaling",
        "It ignores temporal order",
        "It can't process numbers",
        "It overfits too much"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest treats data as independent samples, ignoring sequence dependence."
    },
    {
      "id": 84,
      "questionText": "What is a sign that 'n_estimators' should be increased?",
      "options": [
        "Very fast training",
        "Perfect accuracy",
        "High test variance",
        "Low training accuracy only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Increasing trees reduces prediction variance and stabilizes model."
    },
    {
      "id": 85,
      "questionText": "What is 'entropy' used for in Random Forest?",
      "options": [
        "Learning rate control",
        "Feature normalization",
        "Pruning strategy",
        "Split quality measure"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Entropy and Gini are purity measures used to decide best splits."
    },
    {
      "id": 86,
      "questionText": "Which scenario may require reducing 'max_depth'?",
      "options": [
        "When training time is extremely short",
        "When features are few",
        "When training accuracy is perfect but test accuracy is low",
        "When both accuracies are low"
      ],
      "correctAnswerIndex": 2,
      "explanation": "This indicates overfitting — reducing depth increases generalization."
    },
    {
      "id": 87,
      "questionText": "What is one major strength of Random Forest?",
      "options": [
        "Perfect for text generation",
        "Robust to noise and overfitting",
        "Predicts time trends",
        "Always fastest model"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest is sturdy against noisy data due to ensemble averaging."
    },
    {
      "id": 88,
      "questionText": "Increasing 'min_samples_leaf' will most likely:",
      "options": [
        "Make model generalize better",
        "Decrease bias heavily",
        "Increase training variance",
        "Increase memorization"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Larger leaves lead to simpler splits and better generalization."
    },
    {
      "id": 89,
      "questionText": "Which metric is best for class imbalance evaluation in Random Forest?",
      "options": [
        "MSE",
        "Recall / F1-score",
        "Accuracy only",
        "R-squared"
      ],
      "correctAnswerIndex": 1,
      "explanation": "F1 handles imbalanced data better by balancing precision and recall."
    },
    {
      "id": 90,
      "questionText": "What happens if 'max_features' is too high?",
      "options": [
        "Lower training accuracy",
        "Trees become more random",
        "Trees become more similar",
        "OOB error becomes undefined"
      ],
      "correctAnswerIndex": 2,
      "explanation": "More features → less randomness → higher correlation between trees."
    },
    {
      "id": 91,
      "questionText": "Which combination may indicate optimal Random Forest tuning?",
      "options": [
        "Low accuracy on both",
        "High train accuracy, high test accuracy",
        "Low train accuracy, high test accuracy",
        "High train accuracy, low test accuracy"
      ],
      "correctAnswerIndex": 1,
      "explanation": "This indicates low bias and low variance — a well-generalized model."
    },
    {
      "id": 92,
      "questionText": "Why doesn’t Random Forest require much hyperparameter tuning compared to other models?",
      "options": [
        "It ignores input data",
        "It is robust to overfitting and variance",
        "It always needs deep tuning",
        "It cannot be tuned"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Random Forest naturally reduces variance and overfitting, making it less sensitive to hyperparameters."
    },
    {
      "id": 93,
      "questionText": "What is the effect of increasing 'n_estimators' on OOB error?",
      "options": [
        "OOB error is unaffected",
        "OOB error fluctuates randomly",
        "OOB error usually decreases and stabilizes",
        "OOB error increases"
      ],
      "correctAnswerIndex": 2,
      "explanation": "More trees provide a better estimate of error and reduce variance of predictions."
    },
    {
      "id": 94,
      "questionText": "Which is true about correlated features in Random Forest?",
      "options": [
        "Correlation is ignored completely",
        "Random Forest fails with correlation",
        "Correlated features are removed automatically",
        "Importance may be split among correlated features"
      ],
      "correctAnswerIndex": 3,
      "explanation": "When features are correlated, importance scores may be shared, lowering individual scores."
    },
    {
      "id": 95,
      "questionText": "Random Forest is considered a black-box model because?",
      "options": [
        "It outputs linear coefficients",
        "It uses shallow trees only",
        "It is hard to interpret individual predictions",
        "It has only one tree"
      ],
      "correctAnswerIndex": 2,
      "explanation": "The ensemble of many trees makes it difficult to trace exact reasoning for predictions."
    },
    {
      "id": 96,
      "questionText": "Which is a good approach to reduce Random Forest computation on very large datasets?",
      "options": [
        "Remove bagging",
        "Use all features",
        "Increase depth",
        "Reduce 'n_estimators' or use 'max_samples'"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Fewer trees or smaller bootstrap samples lower computational cost."
    },
    {
      "id": 97,
      "questionText": "Why is Random Forest more stable than a single Decision Tree?",
      "options": [
        "Because it uses scaling",
        "Because it prunes all trees heavily",
        "Because it has only one tree",
        "Because predictions are averaged over many trees"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Averaging reduces sensitivity to noise and variance in data."
    },
    {
      "id": 98,
      "questionText": "What kind of bias-variance tradeoff does Random Forest achieve?",
      "options": [
        "High bias, low variance",
        "Low bias, low variance",
        "High bias, high variance",
        "Low bias, high variance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Bagging ensures variance reduction while keeping bias relatively low."
    },
    {
      "id": 99,
      "questionText": "Which Random Forest feature allows quick insight into feature relevance?",
      "options": [
        "Feature importance scores",
        "OOB error",
        "min_samples_split",
        "max_depth"
      ],
      "correctAnswerIndex": 0,
      "explanation": "These scores help identify which features are most influential in predictions."
    },
    {
      "id": 100,
      "questionText": "In Random Forest classification, which method aggregates the outputs of all trees?",
      "options": [
        "Gradient boosting",
        "Weighted averaging",
        "Softmax",
        "Majority voting"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Random Forest takes the class predicted by the majority of trees as the final output."
    }
  ]
}