I have a numpy array like this for example :
我有一个这样的numpy数组,例如:
array([[0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 0, 1],
[1, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 0],
[1, 1, 0, 0, 0, 1]])
and I would like to find consecutive pixels of value 1 in each column, and to set these pixels values to the obtained length, to get this output :
我想在每列中找到值为1的连续像素,并将这些像素值设置为获得的长度,以获得此输出:
array([[0, 0, 0, 3, 0, 4],
[0, 0, 0, 3, 0, 4],
[2, 1, 1, 3, 0, 4],
[2, 0, 0, 0, 2, 4],
[0, 0, 0, 0, 2, 0],
[1, 1, 0, 0, 0, 1]])
Thanks for your help
谢谢你的帮助
2 个解决方案
#1
1
I feel there might be a shorter pandas
solution if you are willing to use that library, but for now this is the best I could come up with.
如果您愿意使用该库,我觉得可能会有更短的熊猫解决方案,但是现在这是我能想到的最好的。
>>> from numpy import array
>>> from itertools import chain, groupby
>>>
>>> a = array([[0, 0, 0, 1, 0, 1],
... [0, 0, 0, 1, 0, 1],
... [1, 1, 1, 1, 0, 1],
... [1, 0, 0, 0, 1, 1],
... [0, 0, 0, 0, 1, 0],
... [1, 1, 0, 0, 0, 1]])
>>>
>>> groups = (groupby(col, bool) for col in a.T)
>>> unrolled = ([(one, list(sub)) for one, sub in grp] for grp in groups)
>>> mult = ([[x*len(sub) for x in sub] if one else sub for one, sub in grp] for grp in unrolled)
>>> chained = [list(chain(*sub)) for sub in mult]
>>> result = array(chained).T
>>>
>>> result
array([[0, 0, 0, 3, 0, 4],
[0, 0, 0, 3, 0, 4],
[2, 1, 1, 3, 0, 4],
[2, 0, 0, 0, 2, 4],
[0, 0, 0, 0, 2, 0],
[1, 1, 0, 0, 0, 1]])
#2
2
Approach #1
def scaleby_grouplen(ar):
a = ar==1
a1 = np.pad(a, ((1, 1), (0, 0)), 'constant')
a2 = a1.ravel('F')
idx = np.flatnonzero(a2[1:] != a2[:-1])
start, stop = idx[::2], idx[1::2]
id_ar = np.zeros(len(a2), dtype=int)
id_ar[start+1] = 1
idx_ar = id_ar.cumsum()-1
lens = stop - start
out = a*lens[idx_ar].reshape(-1,a.shape[0]+2).T[1:-1]
return out
Approach #2
Alternatively, making use of np.maximum.accumulate
to replace cumsum
part -
或者,使用np.maximum.accumulate替换cumsum部分 -
def scaleby_grouplen_v2(ar):
a = ar==1
a1 = np.pad(a, ((1, 1), (0, 0)), 'constant')
a2 = a1.ravel('F')
idx = np.flatnonzero(a2[1:] != a2[:-1])
start, stop = idx[::2], idx[1::2]
id_ar = np.zeros(len(a2), dtype=int)
id_ar[start+1] = np.arange(len(start))
idx_ar = np.maximum.accumulate(id_ar)
lens = stop - start
out = a*lens[idx_ar].reshape(-1,a.shape[0]+2).T[1:-1]
return out
Approach #3
Using np.repeat
to repeat the group lengths and hence filling -
使用np.repeat重复组长度,从而填充 -
def scaleby_grouplen_v3(ar):
a = ar==1
a1 = np.pad(a, ((1, 1), (0, 0)), 'constant')
a2 = a1.ravel('F')
idx = np.flatnonzero(a2[1:] != a2[:-1])
lens = idx[1::2] - idx[::2]
out = ar.copy()
out.T[a.T] = np.repeat(lens, lens)
return out
Sample run -
样品运行 -
In [177]: a
Out[177]:
array([[0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 0, 1],
[1, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 0],
[1, 1, 0, 0, 0, 1]])
In [178]: scaleby_grouplen(a)
Out[178]:
array([[0, 0, 0, 3, 0, 4],
[0, 0, 0, 3, 0, 4],
[2, 1, 1, 3, 0, 4],
[2, 0, 0, 0, 2, 4],
[0, 0, 0, 0, 2, 0],
[1, 1, 0, 0, 0, 1]])
Benchmarking
Other approach(es) -
其他方法 -
from numpy import array
from itertools import chain, groupby
# @timgeb's soln
def chain_groupby(a):
groups = (groupby(col, bool) for col in a.T)
unrolled = ([(one, list(sub)) for one, sub in grp] for grp in groups)
mult = ([[x*len(sub) for x in sub] if one else sub for one, sub in grp] for grp in unrolled)
chained = [list(chain(*sub)) for sub in mult]
result = array(chained).T
return result
Timings -
In [280]: np.random.seed(0)
In [281]: a = np.random.randint(0,2,(1000,1000))
In [282]: %timeit chain_groupby(a)
1 loop, best of 3: 667 ms per loop
In [283]: %timeit scaleby_grouplen(a)
100 loops, best of 3: 17.7 ms per loop
In [284]: %timeit scaleby_grouplen_v2(a)
100 loops, best of 3: 17.1 ms per loop
In [331]: %timeit scaleby_grouplen_v3(a)
100 loops, best of 3: 18.6 ms per loop
#1
1
I feel there might be a shorter pandas
solution if you are willing to use that library, but for now this is the best I could come up with.
如果您愿意使用该库,我觉得可能会有更短的熊猫解决方案,但是现在这是我能想到的最好的。
>>> from numpy import array
>>> from itertools import chain, groupby
>>>
>>> a = array([[0, 0, 0, 1, 0, 1],
... [0, 0, 0, 1, 0, 1],
... [1, 1, 1, 1, 0, 1],
... [1, 0, 0, 0, 1, 1],
... [0, 0, 0, 0, 1, 0],
... [1, 1, 0, 0, 0, 1]])
>>>
>>> groups = (groupby(col, bool) for col in a.T)
>>> unrolled = ([(one, list(sub)) for one, sub in grp] for grp in groups)
>>> mult = ([[x*len(sub) for x in sub] if one else sub for one, sub in grp] for grp in unrolled)
>>> chained = [list(chain(*sub)) for sub in mult]
>>> result = array(chained).T
>>>
>>> result
array([[0, 0, 0, 3, 0, 4],
[0, 0, 0, 3, 0, 4],
[2, 1, 1, 3, 0, 4],
[2, 0, 0, 0, 2, 4],
[0, 0, 0, 0, 2, 0],
[1, 1, 0, 0, 0, 1]])
#2
2
Approach #1
def scaleby_grouplen(ar):
a = ar==1
a1 = np.pad(a, ((1, 1), (0, 0)), 'constant')
a2 = a1.ravel('F')
idx = np.flatnonzero(a2[1:] != a2[:-1])
start, stop = idx[::2], idx[1::2]
id_ar = np.zeros(len(a2), dtype=int)
id_ar[start+1] = 1
idx_ar = id_ar.cumsum()-1
lens = stop - start
out = a*lens[idx_ar].reshape(-1,a.shape[0]+2).T[1:-1]
return out
Approach #2
Alternatively, making use of np.maximum.accumulate
to replace cumsum
part -
或者,使用np.maximum.accumulate替换cumsum部分 -
def scaleby_grouplen_v2(ar):
a = ar==1
a1 = np.pad(a, ((1, 1), (0, 0)), 'constant')
a2 = a1.ravel('F')
idx = np.flatnonzero(a2[1:] != a2[:-1])
start, stop = idx[::2], idx[1::2]
id_ar = np.zeros(len(a2), dtype=int)
id_ar[start+1] = np.arange(len(start))
idx_ar = np.maximum.accumulate(id_ar)
lens = stop - start
out = a*lens[idx_ar].reshape(-1,a.shape[0]+2).T[1:-1]
return out
Approach #3
Using np.repeat
to repeat the group lengths and hence filling -
使用np.repeat重复组长度,从而填充 -
def scaleby_grouplen_v3(ar):
a = ar==1
a1 = np.pad(a, ((1, 1), (0, 0)), 'constant')
a2 = a1.ravel('F')
idx = np.flatnonzero(a2[1:] != a2[:-1])
lens = idx[1::2] - idx[::2]
out = ar.copy()
out.T[a.T] = np.repeat(lens, lens)
return out
Sample run -
样品运行 -
In [177]: a
Out[177]:
array([[0, 0, 0, 1, 0, 1],
[0, 0, 0, 1, 0, 1],
[1, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 0],
[1, 1, 0, 0, 0, 1]])
In [178]: scaleby_grouplen(a)
Out[178]:
array([[0, 0, 0, 3, 0, 4],
[0, 0, 0, 3, 0, 4],
[2, 1, 1, 3, 0, 4],
[2, 0, 0, 0, 2, 4],
[0, 0, 0, 0, 2, 0],
[1, 1, 0, 0, 0, 1]])
Benchmarking
Other approach(es) -
其他方法 -
from numpy import array
from itertools import chain, groupby
# @timgeb's soln
def chain_groupby(a):
groups = (groupby(col, bool) for col in a.T)
unrolled = ([(one, list(sub)) for one, sub in grp] for grp in groups)
mult = ([[x*len(sub) for x in sub] if one else sub for one, sub in grp] for grp in unrolled)
chained = [list(chain(*sub)) for sub in mult]
result = array(chained).T
return result
Timings -
In [280]: np.random.seed(0)
In [281]: a = np.random.randint(0,2,(1000,1000))
In [282]: %timeit chain_groupby(a)
1 loop, best of 3: 667 ms per loop
In [283]: %timeit scaleby_grouplen(a)
100 loops, best of 3: 17.7 ms per loop
In [284]: %timeit scaleby_grouplen_v2(a)
100 loops, best of 3: 17.1 ms per loop
In [331]: %timeit scaleby_grouplen_v3(a)
100 loops, best of 3: 18.6 ms per loop