-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFunctionStringAssociateExtra.py
More file actions
380 lines (326 loc) · 13.3 KB
/
FunctionStringAssociateExtra.py
File metadata and controls
380 lines (326 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
import idaapi
import ida_auto
import ida_kernwin
import re
import string
import time
import unicodedata
from PySide6 import QtWidgets
from ida_domain import Database
from ida_domain.operands import ImmediateOperand
MAX_LINE_STRING_COUNT = 10
MAX_LABEL_STRING = 60
MAX_COMMENT_SIZE = 764
MIN_STRING_SIZE = 4
PLUGIN_NAME = "Function String Associate Extra"
PLUGIN_HOTKEY = ""
g_replace_comments = False
def filter_whitespace(input_string: str) -> str:
"""
Replaces all non-printable ASCII characters with a space and trims result.
"""
return "".join(ch if " " <= ch <= "~" else " " for ch in input_string).strip()
def safe_func_name(name):
clean = []
for ch in name:
if (
ch.isalnum()
or ch in ['_', ':', '~']
or unicodedata.category(ch)[0] in {'L', 'N'}
):
clean.append(ch)
else:
clean.append('_')
return ''.join(clean)[:250]
def safe_comment_text(s):
return s.replace('\x00', ' ').replace('\r', ' ')[:750]
def is_pretty_printable(s: str) -> bool:
letters = sum(1 for ch in s if ch in string.ascii_letters + string.digits)
printable = sum(1 for ch in s if ch in string.printable and ch not in '\t\r\n\x0b\x0c')
l = len(s)
if l == 0 or printable == 0:
return False
return letters >= 3 and (printable / l) > 0.7
def _read_string_at(db: Database, ea: int) -> str | None:
"""Try to read a null-terminated string at ea using ida-domain Bytes handler."""
try:
if not db.is_valid_ea(ea):
return None
except Exception:
return None
try:
return db.bytes.get_string_at(ea)
except Exception:
return None
def extract_function_strings(db: Database, func) -> list:
"""Iterate instructions of func, collect pretty strings referenced by
immediate push-like operands. Priority: if a string that looks like a
C++ qualified name (contains '::') is followed by a call to a SEH/throw
helper, it is promoted to the front of the list."""
if not func or func.size() < 8:
return []
found_strings = []
found_strings_set = set()
seh_name_candidate = None
seh_name_is_priority = False
instructions = list(db.functions.get_instructions(func))
string_xrefs = [] # list of (index_in_instructions, filtered_string)
for idx, insn in enumerate(instructions):
mnem = db.instructions.get_mnemonic(insn)
if mnem != "push":
continue
op = db.instructions.get_operand(insn, 0)
if not isinstance(op, ImmediateOperand):
continue
try:
val = op.get_value()
except Exception:
continue
s_full = _read_string_at(db, val)
if not s_full or len(s_full) < MIN_STRING_SIZE:
continue
filtered = filter_whitespace(s_full)
if (len(filtered) < MIN_STRING_SIZE or
filtered in found_strings_set or
not is_pretty_printable(filtered)):
continue
string_xrefs.append((idx, filtered))
found_strings.append([filtered, 1])
found_strings_set.add(filtered)
# SEH/throw promotion: if, shortly after a string push, a call to a
# SEH/throw-like helper appears, promote that string.
for idx, string_val in string_xrefs:
for look_ahead in range(1, 5):
ahead_idx = idx + look_ahead
if ahead_idx >= len(instructions):
break
ins2 = instructions[ahead_idx]
mnem2 = db.instructions.get_mnemonic(ins2)
if mnem2 != "call":
continue
disasm = db.instructions.get_disassembly(ins2) or ""
op_call = disasm.lower()
if any(w in op_call for w in ["appunwindf", "throw", "seh", "uncaught"]):
if "::" in string_val and len(string_val) > 5:
seh_name_candidate = string_val
seh_name_is_priority = True
break
if seh_name_is_priority:
break
if seh_name_is_priority:
found_strings = [[seh_name_candidate, 1]] + [
s for s in found_strings if s[0] != seh_name_candidate
]
return found_strings
def generate_str_comment(function_strings: list) -> str:
if not function_strings:
return ""
seen = set()
unique_strings = []
for string_value, _ref_count in function_strings:
if string_value not in seen:
seen.add(string_value)
unique_strings.append(string_value)
comment_text = "#STR: "
first = True
for string_value in unique_strings:
required_size = len(string_value) + 2
available_size = MAX_COMMENT_SIZE - len(comment_text) - 1
if not first:
required_size += 2
if available_size < required_size:
break
if not first:
comment_text += ", "
comment_text += f"\"{string_value}\""
first = False
return comment_text
def update_function_comment(db: Database, func, comment_text: str) -> None:
if not comment_text:
return
comment_text = safe_comment_text(comment_text)
if not g_replace_comments:
current_comment = (
db.functions.get_comment(func, repeatable=True)
or db.functions.get_comment(func, repeatable=False)
or ""
)
if current_comment:
combined_comment = current_comment + "\n" + comment_text
else:
combined_comment = comment_text
db.functions.set_comment(func, combined_comment, repeatable=True)
else:
db.functions.set_comment(func, comment_text, repeatable=True)
def process_function_add_comments(db: Database, func) -> bool:
function_strings = extract_function_strings(db, func)
if function_strings:
comment_text = generate_str_comment(function_strings)
update_function_comment(db, func, comment_text)
return True
return False
def is_valid_ida_func_name(function_name: str) -> bool:
if len(function_name) > 128:
return False
return bool(re.match(r'^[A-Za-z_][A-Za-z0-9_]*(::[A-Za-z_][A-Za-z0-9_]*)+$', function_name))
def extract_candidate_function_names(comment_text: str) -> list:
if not comment_text:
return []
candidate_names = []
str_comment_match = re.search(r'#STR:(.+)', comment_text)
if str_comment_match:
str_content = str_comment_match.group(1)
quoted_strings = re.findall(r'"(.*?)"', str_content)
for quoted_string in quoted_strings:
if "::" in quoted_string and is_valid_ida_func_name(quoted_string.replace("~", "___")):
candidate = quoted_string.replace("~", "___")
candidate_names.append(candidate)
return candidate_names
def is_autogen_func_name(function_name: str) -> bool:
return re.fullmatch(r'(sub_|nullsub_)[0-9A-Fa-f]{6,}', function_name or "") is not None
def process_function_rename(
db: Database,
func,
existing_names: set,
rename_map=None,
rename_counter=None,
) -> str:
comment_text = db.functions.get_comment(func, repeatable=True) or ""
if not comment_text:
return 'none'
candidate_names = extract_candidate_function_names(comment_text)
if not candidate_names:
return 'none'
orig_name = candidate_names[0]
if not orig_name:
return 'none'
orig_name = orig_name[:240]
current_name = db.functions.get_name(func)
if not is_autogen_func_name(current_name):
return 'skip'
if rename_map is not None and rename_counter is not None:
count = rename_counter.get(orig_name, 0)
postfix = "" if count == 0 else f"_{count}"
new_name_try = safe_func_name(f"{orig_name}{postfix}")
while new_name_try in existing_names or new_name_try in rename_map:
count += 1
new_name_try = safe_func_name(f"{orig_name}_{count}")
rename_counter[orig_name] = count + 1
rename_map[new_name_try] = func.start_ea
if current_name == new_name_try:
return 'none'
if db.functions.set_name(func, new_name_try):
existing_names.add(new_name_try)
return 'ok'
return 'err'
if len(candidate_names) > 1:
return 'warn'
new_name = orig_name
if not new_name or new_name == current_name:
return 'none'
if new_name in existing_names:
return 'warn'
if db.functions.set_name(func, new_name):
existing_names.add(new_name)
return 'ok'
return 'err'
class ReplaceOrAppendDialog(QtWidgets.QDialog):
def __init__(self, function_count, parent=None):
super().__init__(parent)
self.setWindowTitle("Function String Associate")
self.setModal(True)
layout = QtWidgets.QVBoxLayout()
label = QtWidgets.QLabel(
f"This will process all {function_count} functions.\n\n"
"If you choose REPLACE, existing function comments will be overwritten.\n"
"If unchecked, the plugin will APPEND to existing comments.\n"
)
layout.addWidget(label)
self.checkbox = QtWidgets.QCheckBox("Replace existing comments?")
layout.addWidget(self.checkbox)
SB = QtWidgets.QDialogButtonBox.StandardButton
button_box = QtWidgets.QDialogButtonBox(
SB(SB.Ok.value | SB.Cancel.value))
button_box.accepted.connect(self.accept)
button_box.rejected.connect(self.reject)
layout.addWidget(button_box)
self.setLayout(layout)
def should_replace(self):
return self.checkbox.isChecked()
def show_qt_dialog(function_count):
dialog = ReplaceOrAppendDialog(function_count)
result = dialog.exec()
if result == QtWidgets.QDialog.DialogCode.Accepted:
return dialog.should_replace()
else:
return None
class FunctionStringAssociatePlugin(idaapi.plugin_t):
flags = idaapi.PLUGIN_UNL
comment = "Extracts strings from functions as comments then renames via #STR"
help = comment
wanted_name = PLUGIN_NAME
wanted_hotkey = PLUGIN_HOTKEY
def init(self):
print(f"[{PLUGIN_NAME}] Plugin loaded.")
return idaapi.PLUGIN_OK
def run(self, arg):
if not ida_auto.auto_is_ok():
ida_kernwin.warning("Please wait until auto-analysis completes!")
return
db = Database.open()
all_functions = list(db.functions.get_all())
total = len(all_functions)
user_choice = show_qt_dialog(total)
if user_choice is None:
return
global g_replace_comments
g_replace_comments = user_choice
comment_mode = "REPLACE" if g_replace_comments else "APPEND"
print(f"[{PLUGIN_NAME}] Starting in {comment_mode} mode.")
start_time = time.time()
ida_kernwin.show_wait_box("Function String Associate: adding comments...")
comment_count = 0
for idx, func in enumerate(all_functions):
if process_function_add_comments(db, func):
comment_count += 1
if idx % 100 == 0:
ida_kernwin.replace_wait_box(f"Processing comments: {idx+1}/{total}")
if ida_kernwin.user_cancelled():
print("[INFO] Cancelled by user.")
ida_kernwin.hide_wait_box()
return
# Prefill the set of existing names from the database so we can avoid
# collisions when generating new names during rename.
existing_names = {name for _ea, name in db.names.get_all()}
rename_stats = dict(ok=0, warn=0, skip=0, err=0)
ida_kernwin.replace_wait_box("Renaming functions...")
rename_map = {} # function_name -> function_ea
rename_counter = {} # function_name -> count
for idx, func in enumerate(all_functions):
status = process_function_rename(
db, func, existing_names,
rename_map=rename_map, rename_counter=rename_counter,
)
if status in rename_stats:
rename_stats[status] += 1
if idx % 100 == 0:
ida_kernwin.replace_wait_box(f"Renaming: {idx+1}/{total}")
if ida_kernwin.user_cancelled():
print("[INFO] Cancelled by user.")
break
ida_kernwin.hide_wait_box()
elapsed = time.time() - start_time
print(f"\n[{PLUGIN_NAME}]")
print(f"--- Summary ---")
print(f"Functions with new comments: {comment_count}")
print(f"Renamed: {rename_stats['ok']}")
print(f"Skipped (custom name): {rename_stats['skip']}")
print(f"Warnings: {rename_stats['warn']}")
print(f"Errors: {rename_stats['err']}")
print(f"Total time: {elapsed:.2f} sec")
print("------------------------\n")
idaapi.refresh_idaview_anyway()
def term(self):
print(f"[{PLUGIN_NAME}] Plugin exited.")
def PLUGIN_ENTRY():
return FunctionStringAssociatePlugin()