Spaces:
Runtime error
Runtime error
| ''' | |
| A sampler is just a list of integer listing the indexes of the | |
| inputs in a data set to sample. For reproducibility, the | |
| FixedRandomSubsetSampler uses a seeded prng to produce the same | |
| sequence always. FixedSubsetSampler is just a wrapper for an | |
| explicit list of integers. | |
| coordinate_sample solves another sampling problem: when testing | |
| convolutional outputs, we can reduce data explosing by sampling | |
| random points of the feature map rather than the entire feature map. | |
| coordinate_sample does this in a deterministic way that is also | |
| resolution-independent. | |
| ''' | |
| import numpy | |
| import random | |
| from torch.utils.data.sampler import Sampler | |
| class FixedSubsetSampler(Sampler): | |
| """Represents a fixed sequence of data set indices. | |
| Subsets can be created by specifying a subset of output indexes. | |
| """ | |
| def __init__(self, samples): | |
| self.samples = samples | |
| def __iter__(self): | |
| return iter(self.samples) | |
| def __len__(self): | |
| return len(self.samples) | |
| def __getitem__(self, key): | |
| return self.samples[key] | |
| def subset(self, new_subset): | |
| return FixedSubsetSampler(self.dereference(new_subset)) | |
| def dereference(self, indices): | |
| ''' | |
| Translate output sample indices (small numbers indexing the sample) | |
| to input sample indices (larger number indexing the original full set) | |
| ''' | |
| return [self.samples[i] for i in indices] | |
| class FixedRandomSubsetSampler(FixedSubsetSampler): | |
| """Samples a fixed number of samples from the dataset, deterministically. | |
| Arguments: | |
| data_source, | |
| sample_size, | |
| seed (optional) | |
| """ | |
| def __init__(self, data_source, start=None, end=None, seed=1): | |
| rng = random.Random(seed) | |
| shuffled = list(range(len(data_source))) | |
| rng.shuffle(shuffled) | |
| self.data_source = data_source | |
| super(FixedRandomSubsetSampler, self).__init__(shuffled[start:end]) | |
| def class_subset(self, class_filter): | |
| ''' | |
| Returns only the subset matching the given rule. | |
| ''' | |
| if isinstance(class_filter, int): | |
| rule = lambda d: d[1] == class_filter | |
| else: | |
| rule = class_filter | |
| return self.subset([i for i, j in enumerate(self.samples) | |
| if rule(self.data_source[j])]) | |
| def coordinate_sample(shape, sample_size, seeds, grid=13, seed=1, flat=False): | |
| ''' | |
| Returns a (end-start) sets of sample_size grid points within | |
| the shape given. If the shape dimensions are a multiple of 'grid', | |
| then sampled points within the same row will never be duplicated. | |
| ''' | |
| if flat: | |
| sampind = numpy.zeros((len(seeds), sample_size), dtype=int) | |
| else: | |
| sampind = numpy.zeros((len(seeds), 2, sample_size), dtype=int) | |
| assert sample_size <= grid | |
| for j, seed in enumerate(seeds): | |
| rng = numpy.random.RandomState(seed) | |
| # Shuffle the 169 random grid squares, and pick :sample_size. | |
| square_count = grid ** len(shape) | |
| square = numpy.stack(numpy.unravel_index( | |
| rng.choice(square_count, square_count)[:sample_size], | |
| (grid,) * len(shape))) | |
| # Then add a random offset to each x, y and put in the range [0...1) | |
| # Notice this selects the same locations regardless of resolution. | |
| uniform = (square + rng.uniform(size=square.shape)) / grid | |
| # TODO: support affine scaling so that we can align receptive field | |
| # centers exactly when sampling neurons in different layers. | |
| coords = (uniform * numpy.array(shape)[:,None]).astype(int) | |
| # Now take sample_size without replacement. We do this in a way | |
| # such that if sample_size is decreased or increased up to 'grid', | |
| # the selected points become a subset, not totally different points. | |
| if flat: | |
| sampind[j] = numpy.ravel_multi_index(coords, dims=shape) | |
| else: | |
| sampind[j] = coords | |
| return sampind | |
| if __name__ == '__main__': | |
| from numpy.testing import assert_almost_equal | |
| # Test that coordinate_sample is deterministic, in-range, and scalable. | |
| assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102)), | |
| [[[14, 0, 12, 11, 8, 13, 11, 20, 7, 20], | |
| [ 9, 22, 7, 11, 23, 18, 21, 15, 2, 5]]]) | |
| assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 102)), | |
| [[[ 7, 0, 6, 5, 4, 6, 5, 10, 3, 20 // 2], | |
| [ 4, 11, 3, 5, 11, 9, 10, 7, 1, 5 // 2]]]) | |
| assert_almost_equal(coordinate_sample((13, 13), 10, range(100, 102), | |
| flat=True), | |
| [[ 8, 24, 67, 103, 87, 79, 138, 94, 98, 53], | |
| [ 95, 11, 81, 70, 63, 87, 75, 137, 40, 2+10*13]]) | |
| assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 103), | |
| flat=True), | |
| [[ 95, 11, 81, 70, 63, 87, 75, 137, 40, 132], | |
| [ 0, 78, 114, 111, 66, 45, 72, 73, 79, 135]]) | |
| assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102), | |
| flat=True), | |
| [[373, 22, 319, 297, 231, 356, 307, 535, 184, 5+20*26]]) | |
| # Test FixedRandomSubsetSampler | |
| fss = FixedRandomSubsetSampler(range(10)) | |
| assert len(fss) == 10 | |
| assert_almost_equal(list(fss), [8, 0, 3, 4, 5, 2, 9, 6, 7, 1]) | |
| fss = FixedRandomSubsetSampler(range(10), 3, 8) | |
| assert len(fss) == 5 | |
| assert_almost_equal(list(fss), [4, 5, 2, 9, 6]) | |
| fss = FixedRandomSubsetSampler([(i, i % 3) for i in range(10)], | |
| class_filter=1) | |
| assert len(fss) == 3 | |
| assert_almost_equal(list(fss), [4, 7, 1]) | |