Normalizing coordinates & neighborhoods

2015-06-19 23:48:53 -04:00 · 2015-06-19 23:48:53 -04:00 · 75c9dc22fb
parent 0145165649
commit 75c9dc22fb
8 changed files with 236 additions and 240 deletions
--- a/src/configuration.py
+++ b/src/configuration.py
@ -1,4 +1,143 @@
 import numpy as np
 from bitarray import bitarray
 from itertools import product
 from collections import namedtuple
 class Neighborhood:
    """
    A neighborhood is a collection of cells around a given cell.
    The neighborhood is closely related to a configuration, which
    defines how a neighborhood is expected to look. One can think
    of a neighborhood as an instantiation of a given configuration,
    as it contains a focus cell and the cells that should be considered
    when determing the focus cell's next state.
    Offsets must be added separate from instantiation, since it isn't always necessary to
    perform this computation in the first place (for example, if an ALWAYS_PASS flag is passed
    as opposed to a MATCH flag).
    """
    def __init__(self, index):
        """
        Initializes the center cell.
        Offsetted cells belonging in the given neighborhood must be added separately.
        """
        self.total = 0
        self.index = index
        self.neighbors = bitarray()
    def populate(self, plane, offsets):
        """
        Given the plane and offsets, determines the cells in the given neighborhood.
        Note this is a relatively expensive operation, especially if called on every cell
        in a CAM every tick. Instead, consider using the provided class methods which
        shift through the bitarray instead of recomputing offsets
        """
        self.neighbors = bitarray()
        for offset in offsets:
            f_index = plane.flatten(offset) + self.index
            self.neighbors.append(plane[f_index % len(plane.bits)])
        self.total = len(self.neighbors)
    @classmethod
    def get_neighborhoods(cls, plane, offsets):
        """
        Given the list of offsets, return a list of neighborhoods corresponding to every cell.
        Since offsets should generally stay fixed for each cell in a plane, we first flatten
        the coordinates (@offsets should be a list of tuples) relative to the first component
        and cycle through all cells.
        NOTE: If all you need are the total number of cells in each neighborhood, call the
        get_totals method instead, which is significantly faster.
        """
        neighborhoods = []
        if plane.N > 0:
            f_offsets = list(map(plane.flatten, offsets))
            for i in range(len(plane.bits)):
                neighborhood = Neighborhood(plane.unflatten(i))
                for j in range(len(f_offsets)):
                    neighborhood.neighbors.append(plane.bits[j])
                    plane.bits[j] += 1
                neighborhood.total = len(neighborhood.neighbors)
                neighborhoods.append(neighborhood)
        return neighborhoods
    @classmethod
    def get_totals(cls, plane, offsets):
        """
        Returns the total number of neighbors for each cell in a plane.
        After profiling with a previous version, I found that going through each index and totaling the number
        of active states was taking much longer than I liked. Instead, we compute as many neighborhoods as possible
        simultaneously, avoiding explicit summation via the "sum" function, at least for each state separately.
        Because the states are now represented as binary values, we instead add the binary representations together.
        And since offsets are generally consistent between each invocation of the "tick" function, we can add a row
        at a time. For example, given a plane P of shape (3, 3) and the following setup:
        [[0, 1, 1, 0, 1]
        ,[1, 0, 0, 1, 1]    ALIGN    11010    SUM
        ,[0, 1, 1, 0, 0]  =========> 11000 =========> 32111
        ,[1, 0, 0, 1, 0]             10101
        ,[0, 0, 0, 0, 1]
        ]
        with focus cell (1, 1) in the middle and offsets (-1, 0), (1, 0), (-1, 1), we can align the cells according to the above.
        The resulting sum states there are 3 neighbors at (1, 1), 2 neighbors at (1, 2), and 1 neighbor at (1, 3), (1, 4), and (1, 0).
        We do this in chunks of 9, depending on the number of offsets, so no overflowing of a single column can occur.
        We can then find the total of the ith neighborhood by checking the sum of the ith index of the summation of every
        9 chunks of numbers (this is done at the Nth-1 dimension).
        """
        n_counts = []
        # In the first dimension, we have to simply loop through and count for each bit
        if 0 < plane.N <= 1:
            for i in range(len(plane.bits)):
                n_counts.append(sum([plane.bits[i+j] for j in offsets]))
        else:
            for level in range(plane.shape[0]):
                # Since working in N dimensional space, we calculate the totals at a
                # rate of N-1. We do this by generalizing the above doc description, and
                # limit our focus to the offsetted subplane adjacent to the current level,
                # then slicing the returned set of bits accordingly
                neighboring = []
                for offset in offsets:
                    adj_level = level + offset[0]
                    sub_plane = plane[adj_level]
                    sub_index = sub_plane.flatten(offset[1:])
                    sequence = sub_plane.bits[sub_index:] + sub_plane.bits[:sub_index]
                    neighboring.append(int(sequence.to01()))
                # Collect our totals, breaking up each set of neighborhood totals into 9
                # and then adding the resulting collection back up (note once chunks have
                # been added, we add each digit separately (the total reduced by factor of 9))
                totals = [0] * (plane.offsets[0])
                chunks = map(sum, [neighboring[i:i+9] for i in range(0, len(neighboring), 9)])
                for chunk in chunks:
                    padded_chunk = map(int, str(chunk).zfill(len(totals)))
                    totals = map(sum, zip(totals, padded_chunk))
                # Neighboring totals now align with original grid
                n_counts += list(totals)
        return n_counts
 class Configuration:
    """
    Represents an expected neighborhood; to be compared to an actual neighborhood in a CAM.
    A configuration defines an expectation of a cell's neighborhood, and the cell's new state if is passes
    this expectation.
@ -17,83 +156,8 @@ one. But how do we allow two possibilities to yield a 1? We add an additional co
    Often times, a single configuration is perfectly fine, and the exact bits are irrelevant. This
    is the case for all life-life automata for example. In this case, we create a configuration
-with the ALWAYS_PASS flag set in the given ruleset the configuration is bundled in.
+    with the always_pass flag set in the given ruleset the configuration is bundled in.
@date: June 5th, 2015
    """
 import numpy as np
 from itertools import product
 from collections import namedtuple
 class Neighborhood:
    """
    Specifies the cells that should be considered when referencing a particular cell.
    The neighborhood is a wrapper class that stores information regarding a particular cell.
    Offsets must be added separate from instantiation, since it isn't always necessary to
    perform this computation in the first place (for example, if an ALWAYS_PASS flag is passed
    as opposed to a MATCH flag).
    It may be helpful to consider a configuration as a template of a neighborhood, and a neighborhood
    as an instantiation of a configuration (one with concrete values as opposed to templated ones).
    """
    def __init__(self, flat_index, bit_index, total):
        """
        Initializes the center cell.
        Offsetted cells belonging in the given neighborhood must be added separately.
        """
        self.states = None
        self.bit_indices = None
        self.flat_indices = None
        self.total = total
        self.bit_index = bit_index
        self.flat_index = flat_index
    def process_offsets(self, plane, offsets):
        """
        Given the plane and offsets, determines the cells in the given neighborhood.
        This is rather expensive to call on every cell in a grid, so should be used with caution.
        Namely, this is useful when we need to determine matches within a threshold, since total cells
        of a neighborhood are precomputed in the ruleset.
        For example, if we need an exact match of a configuration, we have to first process all the
        offsets of a neighborhood to determine that it indeed matches the configuration (if this was
        not called, self.offsets would remain empty).
        """
        flat_indices, bit_indices, _ = zip(*offsets)
        states = []
        for i in range(len(flat_indices)):
            bit_index = bit_indices[i]
            flat_index = flat_indices[i]
            states.append(plane.grid.flat[flat_index][bit_index])
        self.states = np.array(states)
        self.bit_indices = np.array(bit_indices)
        self.flat_indices = np.array(flat_indices)
 class Configuration:
    """
    Represents an expected neighborhood; to be compared to an actual neighborhood in a CAM.
    A configuration allows specification of a neighborhood, not the actual state of a neighborhood.
    It is merely used for reference by a ruleset, which takes in a series of configurations and
    returns the state referenced by the first configuration that passes.
    """
    # An offset contains the flat_offset, which refers to the bitarray of the plane.grid.flat that
    # a given offset is pointing to. The bit_offset refers to the index of the bitarray at the
    # given flat_offset. State describes the expected state at the given (flat_offset, bit_offset).
    Offset = namedtuple('Offset', ['flat_offset', 'bit_offset', 'state'])
    @staticmethod
    def moore(plane, value=1):
@ -113,7 +177,6 @@ class Configuration:
        return offsets
    @staticmethod
    def neumann(plane, value=1):
        """
@ -125,7 +188,7 @@ class Configuration:
        Note the center cell is excluded, so the total number of offsets are 2N.
        """
-        offsets = []
+        offsets = {}
        variant = [0] * len(plane.shape)
        for i in range(len(variant)):
            for j in [-1, 1]:
@ -135,7 +198,6 @@ class Configuration:
        return offsets
    def __init__(self, next_state, **kwargs):
        """
        @next_state: Represents the next state of a cell given a configuration passes.
@ -145,12 +207,12 @@ class Configuration:
                     referring to the offsets checked in a given neighborhood) with an expected
                     state value and a 'plane' key, corresponding to the plane in question.
        """
-        self.offsets = []
+        self.offsets = bitarray()
        self.sequence = bitarray()
        self.next_state = next_state
        if 'plane' in kwargs and 'offsets' in kwargs:
            self.extend_offsets(kwargs['plane'], kwargs['offsets'])
    def extend_offsets(self, plane, offsets):
        """
        Allow for customizing of configuration.
@ -160,9 +222,9 @@ class Configuration:
        of the value at the first coordinate.
        """
        for coor, bit in offsets.items():
-            flat_index, bit_index = plane.flatten(coor)
+            f_index = plane.flatten(coor)
-            self.offsets.append(Configuration.Offset(flat_index, bit_index, bit))
+            self.offsets.append(f_index)
-
+            self.sequence.append(bit)
    def passes(self, plane, neighborhood, vfunc, *args):
        """
@ -184,18 +246,14 @@ class Configuration:
        except TypeError:
            return (True, self.next_state)
    def matches(self, plane, neighborhood):
        """
        Determines that neighborhood matches expectation exactly.
        Note this behaves like the _tolerates method with a tolerance of 1.
        """
-        neighborhood.process_offsets(plane, self.offsets)
+        neighborhood.populate(plane, self.offsets)
-        bits = np.array([offset[2] for offset in self.offsets])
+        return not self.sequence ^ neighborhood.neighbors
        return not np.count_nonzero(bits ^ neighborhood.states)
    def tolerates(self, plane, neighborhood, tolerance):
        """
@ -204,12 +262,9 @@ class Configuration:
        We see that the percentage of actual matches are greater than or equal to the given tolerance level. If so, we
        consider this cell to be alive. Note tolerance must be a value 0 <= t <= 1.
        """
-        neighborhood.process_offsets(plane, self.offsets)
+        neighborhood.populate(plane, self.offsets)
-        bits = np.array([offset[2] for offset in self.offsets])
+        non_matches = self.sequence ^ neighborhood.neighbors
-        non_matches = np.count_nonzero(bits ^ neighborhood.states)
+        return (non_matches / len(self.sequence)) >= tolerance
        return (non_matches / len(bits)) >= tolerance
    def satisfies(self, plane, neighborhood, valid_func, *args):
        """
--- a/src/neighborhood.py
+++ b/src/neighborhood.py
@ -1,127 +0,0 @@
 """
 A neighborhood is a collection of cells around a given cell.
 The neighborhood is closely related to a configuration, which
 defines how a neighborhood is expected to look. One can think
 of a neighborhood as an instantiation of a given configuration,
 as it contains a focus cell and the cells that should be considered
 when determing the focus cell's next state.
@date: June 18, 2015
 """
 class Neighborhood:
    """
    The neighborhood is a wrapper class that stores information regarding a particular cell.
    Offsets must be added separate from instantiation, since it isn't always necessary to
    perform this computation in the first place (for example, if an ALWAYS_PASS flag is passed
    as opposed to a MATCH flag).
    """
    def __init__(self, index):
        """
        Initializes the center cell.
        Offsetted cells belonging in the given neighborhood must be added separately.
        """
        self.total = 0
        self.index = index
        self.neighbors = []
    def populate(self, offsets, plane):
        """
        Given the plane and offsets, determines the cells in the given neighborhood.
        Note this is a relatively expensive operation, especially if called on every cell
        in a CAM every tick. Instead, consider using the provided class methods which
        shift through the bitarray instead of recomputing offsets
        """
        self.neighbors = plane[offsets]
        self.total = len(self.neighbors)
    @classmethod
    def get_neighborhoods(cls, plane, offsets):
        """
        Given the list of offsets, return a list of neighborhoods corresponding to every cell.
        Since offsets should generally stay fixed for each cell in a plane, we first flatten
        the coordinates (@offsets should be a list of tuples) relative to the first component
        and cycle through all cells.
        NOTE: If all you need are the total number of cells in each neighborhood, call the
        get_totals method instead, which is significantly faster.
        """
        neighborhoods = []
        if plane.N > 0:
            f_offsets = list(map(plane.flatten, offsets))
            for i in range(len(plane.bits)):
                neighborhood = Neighborhood(plane.unflatten(i))
                for j in range(len(f_offsets)):
                    neighborhood.neighbors.append(plane.bits[j])
                    plane.bits[j] += 1
                neighborhood.total = len(neighborhood.neighbors)
                neighborhoods.append(neighborhood)
        return neighborhoods
    @classmethod
    def get_totals(cls, plane, offsets):
        """
        Returns the total number of neighbors for each cell in a plane.
        After profiling with a previous version, I found that going through each index and totaling the number
        of active states was taking much longer than I liked. Instead, we compute as many neighborhoods as possible
        simultaneously, avoiding explicit summation via the "sum" function, at least for each state separately.
        Because the states are now represented as binary values, we instead add the binary representations together.
        And since offsets are generally consistent between each invocation of the "tick" function, we can add a row
        at a time. For example, given a plane P of shape (3, 3) and the following setup:
        [[0, 1, 1, 0, 1]
        ,[1, 0, 0, 1, 1]    ALIGN    11010    SUM
        ,[0, 1, 1, 0, 0]  =========> 11000 =========> 32111
        ,[1, 0, 0, 1, 0]             10101
        ,[0, 0, 0, 0, 1]
        ]
        with focus cell (1, 1) in the middle and offsets (-1, 0), (1, 0), (-1, 1), we can align the cells according to the above.
        The resulting sum states there are 3 neighbors at (1, 1), 2 neighbors at (1, 2), and 1 neighbor at (1, 3), (1, 4), and (1, 0).
        We do this in chunks of 9, depending on the number of offsets, so no overflowing of a single column can occur.
        We can then find the total of the ith neighborhood by checking the sum of the ith index of the summation of every
        9 chunks of numbers (this is done at the Nth-1 dimension).
        """
        n_counts = []
        # In the first dimension, we have to simply loop through and count for each bit
        if 0 < plane.N <= 1:
            for i in range(len(plane.bits)):
                n_counts.append(sum([plane.bits[i+j] for j in offsets]))
        else:
            for level in range(plane.shape[0]):
                # Since working in N dimensional space, we calculate the totals at a
                # rate of N-1. We do this by generalizing the above doc description, and
                # limit our focus to the offsetted subplane adjacent to the current level,
                # then slicing the returned set of bits accordingly
                neighboring = []
                for offset in offsets:
                    adj_level = level + offset[0]
                    sub_plane = plane[adj_level]
                    sub_index = sub_plane.flatten(offset[1:])
                    sequence = sub_plane.bits[sub_index:] + sub_plane.bits[:sub_index]
                    neighboring.append(int(sequence.to01()))
                # Collect our totals, breaking up each set of neighborhood totals into 9
                # and then adding the resulting collection back up (note once chunks have
                # been added, we add each digit separately (the total reduced by factor of 9))
                totals = [0] * (plane.offsets[0])
                chunks = map(sum, [neighboring[i:i+9] for i in range(0, len(neighboring), 9)])
                for chunk in chunks:
                    padded_chunk = map(int, str(chunk).zfill(len(totals)))
                    totals = map(sum, zip(totals, padded_chunk))
                # Neighboring totals now align with original grid
                n_counts += list(totals)
        return n_counts
--- a/src/plane.py
+++ b/src/plane.py
@ -1,16 +1,3 @@
 """
 Wrapper of a bitarray.
 For the sake of compactness, the use of numpy arrays have been completely abandoned as a representation
 of the data. This also allows for a bit more consistency throughout the library, where I've often used
 the flat iterator provided by numpy, and other times used the actual array.
 The use of just a bitarray also means it is significantly more compact, indexing of a plane should be
 more efficient, and the entire association between an N-1 dimensional grid with the current shape of
 the plane is no longer a concern.
@date: June 05, 2015
 """
 import random
 import operator
 import numpy as np
@ -20,12 +7,37 @@ from bitarray import bitarray
 from collections import deque
 class Coordinate:
    """
    Allow normilization between flat indices and offsets.
    """
    def __init__(self, index, plane):
        """
        """
        if type(index) is tuple:
            self.index = index
            self.flat = plane.flatten(index)
        else:
            self.flat = index
            self.index = plane.unflatten(index)
 class Plane:
    """
    Represents a cell plane, with underlying usage of bitarrays.
    The following maintains the shape of a contiguous block of memory, allowing the user to interact
    with it as if it was a multidimensional array.
    For the sake of compactness, the use of numpy arrays have been completely abandoned as a representation
    of the data. This also allows for a bit more consistency throughout the library, where I've often used
    the flat iterator provided by numpy, and other times used the actual array.
    The use of just a bitarray also means it is significantly more compact, indexing of a plane should be
    more efficient, and the entire association between an N-1 dimensional grid with the current shape of
    the plane is no longer a concern.
    """
    def __init__(self, shape, bits = None):
--- a/src/ruleset.py
+++ b/src/ruleset.py
@ -64,7 +64,19 @@ class Ruleset:
               arg should be a function returning a BOOL, which takes in a current cell's value, and the
               value of its neighbors.
        """
-        next_states = []
+
        # These are the states of configurations that pass (note if all configurations
        # fail for any state, the state remains the same)
        next_states = plane.bits.copy()
        # These are the states we attempt to apply a configuration to
        # Since totals are computed simultaneously, we save which states do not pass
        # for each configuration
        current_states = enumerate(plane.bits)
        for config in self.configurations:
            totals = Neighborhood.get_totals(plane, config.offsets)
        for index, state in enumerate(plane.bits):
--- a/tests/config_test.py
+++ b/tests/config_test.py
--- a/tests/configuration_test.py
+++ b/tests/configuration_test.py
@ -0,0 +1,54 @@
 import os, sys
 sys.path.insert(0, os.path.join('..', 'src'))
 import plane
 import numpy as np
 from neighborhood import Neighborhood
 from configuration import Configuration
 class TestConfiguration:
    """
    """
    def setUp(self):
        self.neighborhood = Neighborhood(0)
        self.plane2d = plane.Plane((100, 100))
        self.config2d = Configuration(0, plane=self.plane2d, offsets={
            (-1, -1): 1,
            (-1, 0): 1,
            (1, -1): 1,
            (0, 0): 1
        })
        self.plane3d = plane.Plane((100, 100, 100))
        self.config3d = Configuration(1, plane=self.plane3d, offsets={
            (-1, 0, 1): 1,
            (-2, 1, 1): 1,
            (-1, 0, 0): 0
        })
    def test_mooreNeighborhoodOffsets(self):
        """
        """
        assert len(Configuration.moore(self.plane2d)) == 8
        assert len(Configuration.moore(self.plane3d)) == 26
    def test_neumannNeighborhoodOffsets(self):
        """
        """
        assert len(Configuration.neumann(self.plane2d)) == 4
        assert len(Configuration.neumann(self.plane3d)) == 6
    def test_matchNeighborhood(self):
        """
        """
        assert not self.config2d.matches(self.plane2d, self.neighborhood)
        self.plane2d[[(-1, -1), (-1, 0), (1, -1), (0, 0)]] = 1
        assert self.config2d.matches(self.plane2d, self.neighborhood)
--- a/tests/neighborhood_test.py
+++ b/tests/neighborhood_test.py
@ -8,7 +8,7 @@ import numpy as np
 from neighborhood import Neighborhood
-class TestProperties:
+class TestNeighborhood:
    """
    """
@ -17,12 +17,12 @@ class TestProperties:
        self.neigh2d = Neighborhood(0)
        self.offsets2d = [(-1, 0), (1, 0)]
        self.plane2d = plane.Plane((100, 100))
-        self.neigh2d.populate(self.offsets2d, self.plane2d)
+        self.neigh2d.populate(self.plane2d, self.offsets2d)
        self.neigh3d = Neighborhood(0)
        self.offsets3d = [(-1, 0, 0), (1, 0, 1)]
        self.plane3d = plane.Plane((100, 100, 100))
-        self.neigh3d.populate(self.offsets3d, self.plane3d)
+        self.neigh3d.populate(self.plane3d, self.offsets3d)
    def test_neighborhoodLength(self):
        """
@ -87,4 +87,3 @@ class TestProperties:
        assert np.count_nonzero(np.array(t1)) == 200
        assert np.count_nonzero(np.array(t2)) == 20000
--- a/tests/plane_test.py
+++ b/tests/plane_test.py
@ -5,7 +5,7 @@ import plane
 import numpy as np
-class TestProperties:
+class TestPlane:
    """
    """
@ -41,15 +41,6 @@ class TestProperties:
        assert len(self.plane2d.bits) == 100 * 100
        assert len(self.plane3d.bits) == 100 * 100 * 100
 class TestIndexing:
    """
    """
    def setUp(self):
        self.plane2d = plane.Plane((100, 100))
        self.plane3d = plane.Plane((100, 100, 100))
    def test_tupleAssignment(self):
        """
        Tuple Assignment.