@@ -6,7 +6,7 @@ All main functionality is given by the function [`random_sequence`](@ref).
6
6
"""
7
7
module MotifSequenceGenerator
8
8
9
- using Combinatorics, Random
9
+ using Combinatorics, Random, StatsBase
10
10
11
11
export random_sequence, all_possible_sums
12
12
@@ -23,11 +23,12 @@ Base.showerror(io::IO, e::DeadEndMotifs) = print(io,
23
23
The algorithm works as follows: First a random sequence of motifs is created,
24
24
so that it has length of `q - δq ≤ ℓ ≤ q - δq + maximum(motiflengths)`.
25
25
The possible tries of random sequences is set by the `tries` keyword (default `5`).
26
+ The sequence is optionally sampled given a probability vector.
26
27
27
- For each random try, it is first check whether the sequence is already correct.
28
+ For each random try, it is first checked whether the sequence is already correct.
28
29
If not, the last entry of the sequence is dropped. Then, since the sequence is now
29
30
already smaller than `q`, all possible sums of `summands` out of the motif pool
30
- are checked. If some combination of `summands` sums to exactly the difference,
31
+ are checked. If some combination of `summands` sums to the difference,
31
32
they are added to the sequence.
32
33
For multiple satisfactory combinations, a random one is picked.
33
34
@@ -50,7 +51,9 @@ a proper one, an error is thrown.
50
51
Create a random sequence of motifs of type `M`, under the constraint that the
51
52
sequence has "length" `ℓ` **exactly** within `q - δq ≤ ℓ ≤ q + δq`.
52
53
Return the sequence itself as well as the
53
- sequence of indices of `motifs` used to create it.
54
+ sequence of indices of `motifs` used to create it. A vector of probabilities `weights`
55
+ can be given as a keyword argument, which then dictates the sampling probability
56
+ for each entry of `motifs` for the initial sequence created.
54
57
55
58
"length" here means an abstracted length defined by the struct `M`,
56
59
based on the `limits` and `translate` functions.
@@ -64,7 +67,7 @@ It does **not** refer to the amount of elements!
64
67
motif which is translated by `t` (either negative or positive), with
65
68
respect to the same units as `q`.
66
69
67
- ## Keywords
70
+ ## Other Keywords
68
71
Please see the source code (use `@which`) for a full description of the algorithm.
69
72
70
73
* `tries = 5` : Up to how many initial random sequences are accepted.
@@ -74,7 +77,10 @@ Please see the source code (use `@which`) for a full description of the algorith
74
77
"""
75
78
function random_sequence (motifs:: Vector{M} , q,
76
79
limits, translate, δq = 0 ;
77
- tries = 5 , summands = 3 , tailcut = 2 ) where {M}
80
+ tries = 5 , summands = 3 , tailcut = 2 ,
81
+ weights = ones (length (motifs))) where {M}
82
+
83
+ ws = _toweight (weights)
78
84
79
85
idxs = 1 : length (motifs)
80
86
motifs0, motiflens = _motifs_at_origin (motifs, limits, translate)
@@ -94,7 +100,7 @@ function random_sequence(motifs::Vector{M}, q,
94
100
while worked == false
95
101
count > tries && throw (DeadEndMotifs (tries, summands, tailcut))
96
102
97
- seq, seq_length = _random_sequence_try (motiflens, q, δq)
103
+ seq, seq_length = _random_sequence_try (motiflens, q, δq, ws )
98
104
99
105
worked = _complete_sequence! (seq, motiflens, q, δq, summands, tailcut)
100
106
@@ -104,6 +110,8 @@ function random_sequence(motifs::Vector{M}, q,
104
110
return _instantiate_sequence (motifs0, motiflens, seq, translate), seq
105
111
end
106
112
113
+ _toweight (a) = (s = sum (a); ProbabilityWeights (a./ s, 1 ))
114
+
107
115
"""
108
116
_motifs_at_origin(motifs, limits, translate) -> (motifs0, motiflens)
109
117
Bring all motifs to the origin and compute the motif lengths.
@@ -121,15 +129,15 @@ function _motifs_at_origin(motifs::Vector{M}, limits, translate) where M
121
129
end
122
130
123
131
"""
124
- _random_sequence_try(motiflens, q) -> seq, seq_length
132
+ _random_sequence_try(motiflens, q, δq [, ws] ) -> seq, seq_length
125
133
Return a random sequence of motif indices
126
134
so that the total sequence is *guaranteed* to have total length of
127
135
`q - δq ≤ ℓ ≤ q - δq + maximum(motiflens)`.
128
136
"""
129
- function _random_sequence_try (motiflens, q, δq)
137
+ function _random_sequence_try (motiflens, q, δq, ws = defaultweights (motiflens) )
130
138
seq = Int[]; seq_length = 0 ; idxs = 1 : length (motiflens)
131
139
while seq_length < q - δq
132
- i = rand (idxs)
140
+ i = sample (idxs, ws )
133
141
push! (seq, i)
134
142
seq_length += motiflens[i]
135
143
end
@@ -174,13 +182,6 @@ function _complete_sequence_remainder!(seq, motiflens, q, δq, summands, tailcut
174
182
pop! (seq)
175
183
isempty (seq) && return false
176
184
177
- # I Think the following if is unecessary?...
178
- # if q - δq - sum(motiflens[k] for k in seq) < 0
179
- # ok = _complete_sequence_extra!(seq, motiflens, q, δq)
180
- # isempty(seq) && return false
181
- # ok && return true
182
- # end
183
-
184
185
# At this point ℓ is guaranteed less than q - δq
185
186
remainder = q - δq - sum (motiflens[k] for k in seq)
186
187
@assert remainder > 0
0 commit comments