Skip to content

Commit 7554695

Browse files
authored
Add Longest Common Subsequence in M4 (#5250)
1 parent 3e55702 commit 7554695

1 file changed

Lines changed: 209 additions & 0 deletions

File tree

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
divert(-1)
2+
define(`show_usage',
3+
`Usage: please provide two lists in the format "1, 2, 3, 4, 5"
4+
m4exit(`1')')
5+
6+
dnl Reference: https://www.gnu.org/software/m4/manual/m4.html#index-array
7+
dnl array_get(var_name, idx)
8+
define(`array_get', `defn(format(``%s[%s]'', `$1', `$2'))')
9+
10+
dnl array_set(var_name, idx, value)
11+
define(`array_set', `define(format(``%s[%s]'', `$1', `$2'), `$3')')
12+
13+
dnl 2D versions of "array_get" and "array_set"
14+
dnl array2_get(varname, idx1, idx2)
15+
define(`array2_get', `defn(format(``%s[%s][%s]'', `$1', `$2', `$3'))')
16+
17+
dnl array2_set(varname, idx1, idx2, value)
18+
define(`array2_set', `define(format(``%s[%s][%s]'', `$1', `$2', `$3'), `$4')')
19+
20+
dnl array2_append(varname, idx1, value):
21+
dnl varname[idx1][varname[idx1]["length"]] = value
22+
dnl varname[idx1]["length"] = varname[idx1]["length"] + 1
23+
define(`array2_append',
24+
`array2_set(`$1', `$2', array2_get(`$1', `$2', `length'), `$3')dnl
25+
array2_set(`$1', `$2', `length', incr(array2_get(`$1', `$2', `length')))dnl
26+
'dnl
27+
)
28+
29+
dnl is_valid(n)
30+
define(`is_valid', `eval(regexp(`$1', `^\s*-?[0-9]+\s*$') >= 0)')
31+
32+
dnl parse_int_list(varname, args):
33+
dnl varname[length] = 0
34+
dnl foreach arg in args:
35+
dnl if not is_valid(arg):
36+
dnl Return 0
37+
dnl varname[varname[length]] = arg
38+
dnl varname[length] = varname[length] + 1
39+
dnl Return 1
40+
define(`parse_int_list',
41+
`array_set(`$1', `length', 0)dnl
42+
_parse_int_list(`$1', $2)'dnl
43+
)
44+
define(`_parse_int_list',
45+
`ifelse(is_valid(`$2'), 0, `0',
46+
`array_set(`$1', array_get(`$1', `length'), `$2')dnl
47+
array_set(`$1', `length', incr(array_get(`$1', `length')))dnl
48+
ifelse(eval($# > 2), 1, `_parse_int_list(`$1', shift(shift($@)))', `1')'dnl
49+
)'dnl
50+
)
51+
52+
dnl show_int_list(varname):
53+
dnl for i = 0 to n-1:
54+
dnl if i > 0:
55+
dnl Output ", "
56+
dnl Output varname[i]
57+
define(`show_int_list', `_show_int_list(`$1', 0)')
58+
define(`_show_int_list',
59+
`ifelse(eval($2 < array_get(`$1', `length')), 1,
60+
`ifelse(eval($2 > 0), 1, `, ')dnl
61+
array_get(`$1', $2)`'dnl
62+
_show_int_list(`$1', incr($2))'`'dnl
63+
)'dnl
64+
)
65+
66+
dnl Source: https://en.wikipedia.org/wiki/Longest_common_subsequence
67+
dnl However, instead of storing lengths, an index to a subsequence is stored.
68+
dnl longest_common_subsequence(list1_varname, list2_varname, c_varname, subseq_varname, result_varname):
69+
dnl // Initialize all subsequences to an empty sequence
70+
dnl m = list1_varname["length"]
71+
dnl n = list2_varname["length"]
72+
dnl for i = 0 to m:
73+
dnl c_varname[i][0] = 0
74+
dnl
75+
dnl for j = 0 to n:
76+
dnl c_varname[0][j] = 0
77+
dnl
78+
dnl subseq_varname[0]["length"] = 0
79+
dnl subseq_varname["length"] = 1
80+
dnl
81+
dnl // Find the longest common subsequence using prior subsequences
82+
dnl for i = 1 to m:
83+
dnl for j = 1 to n:
84+
dnl // If common element found, create new subsequence based on prior
85+
dnl // subsequence with the common element appended
86+
dnl if list1_varname[i - 1] == list2_varname[j - 1]:
87+
dnl c_varname[i][j] = subseq_varname["length"]
88+
dnl copy_array2(subseq_varname, c_varname[i - 1][j - 1], subseq_varname["length"])
89+
dnl array2_append(subseq_varname, subseq_varname["length"], list1_varname[i - 1])
90+
dnl subseq_varname[subseq_varname["length"]] = subseq_varname[subseq_varname["length"]] + 1
91+
dnl // Else, reuse the longer of the two prior subsequences
92+
dnl else:
93+
dnl idx1 = c_varname[i][j - 1]
94+
dnl idx2 = c_varname[i - 1][j]
95+
dnl if subseq_varname[idx1]["length"] > subseq_varname[idx2]["length"]:
96+
dnl c_varname[i][j] = idx1
97+
dnl else:
98+
dnl c_varname[i][j] = idx2
99+
dnl
100+
dnl // Store result
101+
dnl result_varname["length"] = subseq_varname[c_varname[m][n]]["length"]
102+
dnl for i = 0 to result_varname["length"] - 1:
103+
dnl result_varname[i] = subseq_varname[c_varname[m][n]][i]
104+
define(`longest_common_subsequence',
105+
`pushdef(`m', array_get(`$1', `length'))dnl
106+
pushdef(`n', array_get(`$2', `length'))dnl
107+
_lcs_init_c_col0(`$3', 0)dnl
108+
_lcs_init_c_row0(`$3', 1)dnl
109+
array2_set(`$4', 0, `length', 0)dnl
110+
array_set(`$4', `length', 1)dnl
111+
_lcs_outer(`$1', `$2', `$3', `$4', 1)dnl
112+
_lcs_store_result(`$4', array2_get(`$3', m, n), `$5')dnl
113+
popdef(`n')dnl
114+
popdef(`m')dnl
115+
'dnl
116+
)
117+
118+
dnl c_varname=$1, i=$2
119+
define(`_lcs_init_c_col0',
120+
`ifelse(eval($2 <= m), 1,
121+
`array2_set(`$1', `$2', 0, 0)dnl
122+
_lcs_init_c_col0(`$1', incr($2))dnl
123+
'dnl
124+
)'dnl
125+
)
126+
127+
dnl c_varname=$1, j=$2
128+
define(`_lcs_init_c_row0',
129+
`ifelse(eval($2 <= n), 1,
130+
`array2_set(`$1', 0, `$2', 0)dnl
131+
_lcs_init_c_row0(`$1', incr($2))dnl
132+
'dnl
133+
)'dnl
134+
)
135+
136+
dnl list1_varname=$1, list2_varname=$2, c_varname=$3, subseq_varname=$4, i=$5
137+
define(`_lcs_outer',
138+
`ifelse(eval($5 <= m), 1,
139+
`_lcs_inner(`$1', `$2', `$3', `$4', `$5', 1)dnl
140+
_lcs_outer(`$1', `$2', `$3', `$4', incr($5))dnl
141+
'dnl
142+
)'dnl
143+
)
144+
145+
dnl list1_varname=$1, list2_varname=$2, c_varname=$3, subseq_varname=$4, i=$5, j=$6
146+
define(`_lcs_inner',
147+
`ifelse(eval($6 <= n), 1,
148+
`ifelse(eval(array_get(`$1', decr($5)) == array_get(`$2', decr($6))), 1,
149+
`array2_set(`$3', `$5', `$6', array_get(`$4', `length'))dnl
150+
copy_array2(`$4', array2_get(`$3', decr($5), decr($6)), array_get(`$4', `length'))dnl
151+
array2_append(`$4', array_get(`$4', `length'), array_get(`$1', decr($5)))dnl
152+
array_set(`$4', `length', incr(array_get(`$4', `length')))',
153+
`pushdef(`idx1', array2_get(`$3', `$5', decr($6)))dnl
154+
pushdef(`idx2', array2_get(`$3', decr($5), `$6'))dnl
155+
ifelse(eval(array2_get(`$4', idx1, `length') > array2_get(`$4', idx2, `length')), 1,
156+
`array2_set(`$3', `$5', `$6', idx1)',
157+
`array2_set(`$3', `$5', `$6', idx2)'dnl
158+
)dnl
159+
popdef(`idx2')dnl
160+
popdef(`idx1')dnl
161+
'dnl
162+
)dnl
163+
_lcs_inner(`$1', `$2', `$3', `$4', `$5', incr($6))dnl
164+
'dnl
165+
)'dnl
166+
)
167+
168+
dnl copy_array2(arr2_varname, src_idx, dest_idx):
169+
dnl arr2_varname[dest_idx]["length"] = arr2_varname[src_idx]["length"]
170+
dnl for i = 0 to arr2_varname[src_idx]["length"] - 1
171+
dnl arr2_varname[dest_idx][i] = arr2_varname[src_idx][i]
172+
define(`copy_array2',
173+
`array2_set(`$1', `$3', `length', array2_get(`$1', `$2', `length'))dnl
174+
_copy_array2(`$1', `$2', `$3', 0)dnl
175+
'dnl
176+
)
177+
178+
dnl arr2_varname=$1, src_idx=$2, dest_idx=$3, i=$4
179+
define(`_copy_array2',
180+
`ifelse(eval($4 < array2_get(`$1', `$2', `length')), 1,
181+
`array2_set(`$1', `$3', `$4', array2_get(`$1', `$2', `$4'))dnl
182+
_copy_array2(`$1', `$2', `$3', incr($4))dnl
183+
')dnl
184+
'dnl
185+
)
186+
187+
dnl subseq_varname=$1, src_idx=$2, result_varname=$3
188+
define(`_lcs_store_result',
189+
`array_set(`$3', `length', array2_get(`$1', `$2', `length'))dnl
190+
_lcs_store_result_inner(`$1', `$2', `$3', 0)dnl
191+
'dnl
192+
)
193+
194+
dnl subseq_varname=$1, src_idx=$2, result_varname=$3, i=$4
195+
define(`_lcs_store_result_inner',
196+
`ifelse(eval($4 < array_get(`$3', `length')), 1,
197+
`array_set(`$3', `$4', array2_get(`$1', `$2', `$4'))dnl
198+
_lcs_store_result_inner(`$1', `$2', `$3', incr($4))dnl
199+
'dnl
200+
)'dnl
201+
)
202+
203+
divert(0)dnl
204+
ifelse(eval(ARGC < 2 ||
205+
!parse_int_list(`list1', ARGV1) ||
206+
!parse_int_list(`list2', ARGV2)
207+
), 1, `show_usage()')dnl
208+
longest_common_subsequence(`list1', `list2', `c', `subseq', `result')dnl
209+
show_int_list(`result')

0 commit comments

Comments
 (0)