You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
643 lines
19 KiB
643 lines
19 KiB
#!/usr/bin/env python3 |
|
""" |
|
c_util - C code navigation utility |
|
Commands: |
|
toc - Show table of contents for all files |
|
func <file> <name> - Show function code |
|
struct <file> <name> - Show struct definition |
|
enum <file> <name> - Show enum definition |
|
""" |
|
|
|
import sys |
|
import re |
|
|
|
C_KEYWORDS = { |
|
'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', |
|
'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', |
|
'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', |
|
'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while', |
|
'_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary', |
|
'_Noreturn', '_Static_assert', '_Thread_local' |
|
} |
|
|
|
CONTROL_FLOW = {'if', 'for', 'while', 'switch', 'return', 'else'} |
|
|
|
|
|
def read_filelist(filename='filelist.txt'): |
|
"""Read list of files to process.""" |
|
with open(filename, 'r') as f: |
|
return [line.strip() for line in f if line.strip()] |
|
|
|
|
|
def load_files(files): |
|
"""Load contents of all files.""" |
|
contents = {} |
|
for f in files: |
|
try: |
|
with open(f, 'r') as fd: |
|
contents[f] = fd.read().splitlines() |
|
except Exception as e: |
|
print(f"Warning: could not read {f}: {e}", file=sys.stderr) |
|
return contents |
|
|
|
|
|
def preprocess_lines(lines): |
|
""" |
|
Preprocess lines: |
|
- Remove preprocessor directives (#...) |
|
- Remove comments (/* */ and //) |
|
- Track pre_lines (comments before declarations) |
|
Returns list of tuples: (original_line, is_code, pre_lines) |
|
""" |
|
result = [] |
|
multi_comment = False |
|
pre_lines = [] |
|
|
|
for line in lines: |
|
stripped = line.strip() |
|
|
|
# Handle multi-line comments |
|
if multi_comment: |
|
if '*/' in line: |
|
multi_comment = False |
|
pre_lines.append(line) |
|
result.append((line, False, [])) |
|
continue |
|
|
|
# Start of multi-line comment |
|
if '/*' in line: |
|
end_pos = line.find('*/') |
|
if end_pos == -1: |
|
# Comment continues to next line |
|
multi_comment = True |
|
pre_lines.append(line) |
|
result.append((line, False, [])) |
|
continue |
|
else: |
|
# Single-line block comment |
|
pre_lines.append(line) |
|
result.append((line, False, [])) |
|
continue |
|
|
|
# Single-line comment |
|
if stripped.startswith('//'): |
|
pre_lines.append(line) |
|
result.append((line, False, [])) |
|
continue |
|
|
|
# Preprocessor directive - remove but don't add to pre_lines |
|
if stripped.startswith('#'): |
|
result.append((line, False, [])) |
|
continue |
|
|
|
# Empty line - keep as separator but not code |
|
if not stripped: |
|
result.append((line, False, [])) |
|
continue |
|
|
|
# This is actual code |
|
current_pre = pre_lines[:] |
|
pre_lines = [] |
|
result.append((line, True, current_pre)) |
|
|
|
return result |
|
|
|
|
|
def compute_brace_levels(lines_info): |
|
""" |
|
Compute brace nesting level for each line. |
|
Returns list of levels (level before processing the line). |
|
""" |
|
levels = [] |
|
brace_level = 0 |
|
|
|
for line, is_code, _ in lines_info: |
|
levels.append(brace_level) |
|
|
|
if not is_code: |
|
continue |
|
|
|
stripped = line.strip() |
|
|
|
# Skip preprocessor (already filtered but double-check) |
|
if stripped.startswith('#'): |
|
continue |
|
|
|
# Skip extern "C" { blocks (C++ linkage) |
|
if 'extern "C"' in stripped: |
|
# Count braces in extern "C" line separately |
|
if '{' in stripped: |
|
# Don't count this brace as it opens/closes C linkage block |
|
pass |
|
continue |
|
|
|
# Skip do { ... } while(0) - not real nesting |
|
if stripped.startswith('do {'): |
|
continue |
|
if stripped.startswith('}') and 'while(0)' in stripped: |
|
continue |
|
|
|
# Count braces |
|
brace_level += line.count('{') - line.count('}') |
|
|
|
return levels |
|
|
|
|
|
def extract_identifier_before_paren(line): |
|
""" |
|
Extract function name before first '('. |
|
Returns (name, return_type) or (None, None) if invalid. |
|
""" |
|
paren_pos = line.find('(') |
|
if paren_pos <= 0: |
|
return None, None |
|
|
|
before = line[:paren_pos].strip() |
|
|
|
# Split into parts (handle pointers, qualifiers, etc.) |
|
# e.g., "static int * func_name" -> ["static", "int", "*", "func_name"] |
|
parts = before.split() |
|
|
|
if not parts: |
|
return None, None |
|
|
|
# Last part should be function name |
|
name = parts[-1].strip('*').strip() |
|
|
|
# Check if it's a valid identifier |
|
if not name.isidentifier(): |
|
return None, None |
|
|
|
# Check if it's a keyword |
|
if name in C_KEYWORDS or name in CONTROL_FLOW: |
|
return None, None |
|
|
|
# Return type is everything except name |
|
ret_type = ' '.join(parts[:-1]) if len(parts) > 1 else 'void' |
|
|
|
return name, ret_type |
|
|
|
|
|
def find_matching_brace(lines_info, start_idx, start_level): |
|
""" |
|
Find line index of matching closing brace. |
|
Starts from start_idx, looking for level to return to start_level. |
|
""" |
|
level = start_level |
|
|
|
for i in range(start_idx, len(lines_info)): |
|
line, is_code, _ = lines_info[i] |
|
|
|
if not is_code: |
|
continue |
|
|
|
stripped = line.strip() |
|
|
|
# Skip preprocessor |
|
if stripped.startswith('#'): |
|
continue |
|
|
|
# Skip do-while |
|
if stripped.startswith('do {'): |
|
continue |
|
if stripped.startswith('}') and 'while(0)' in stripped: |
|
continue |
|
|
|
# Check current level before this line |
|
if level == start_level and i > start_idx and '}' in line: |
|
return i |
|
|
|
# Update level |
|
level += line.count('{') - line.count('}') |
|
|
|
return len(lines_info) - 1 |
|
|
|
|
|
def extract_declarations(lines): |
|
""" |
|
Extract all function, struct, and enum declarations. |
|
Returns (functions, structs, enums). |
|
""" |
|
functions = [] |
|
structs = [] |
|
enums = [] |
|
|
|
# Preprocess |
|
lines_info = preprocess_lines(lines) |
|
|
|
# Compute brace levels |
|
brace_levels = compute_brace_levels(lines_info) |
|
|
|
i = 0 |
|
n = len(lines_info) |
|
|
|
while i < n: |
|
line, is_code, pre_lines = lines_info[i] |
|
|
|
if not is_code: |
|
i += 1 |
|
continue |
|
|
|
stripped = line.strip() |
|
level = brace_levels[i] |
|
|
|
# Only process declarations at level 0 |
|
if level != 0: |
|
i += 1 |
|
continue |
|
|
|
# Check for typedef struct/enum/union |
|
if stripped.startswith('typedef struct'): |
|
name, decl_end = parse_typedef_struct(lines_info, i) |
|
if name: |
|
structs.append({ |
|
'name': name, |
|
'pre': pre_lines, |
|
'start': i, |
|
'end': decl_end, |
|
'line_count': decl_end - i + 1 |
|
}) |
|
i = decl_end + 1 |
|
continue |
|
|
|
if stripped.startswith('typedef enum'): |
|
name, decl_end = parse_typedef_enum(lines_info, i) |
|
if name: |
|
enums.append({ |
|
'name': name, |
|
'pre': pre_lines, |
|
'start': i, |
|
'end': decl_end, |
|
'line_count': decl_end - i + 1 |
|
}) |
|
i = decl_end + 1 |
|
continue |
|
|
|
# Check for simple struct/enum declarations |
|
if stripped.startswith('struct ') and '{' in stripped: |
|
name, decl_end = parse_simple_struct(lines_info, i) |
|
if name: |
|
structs.append({ |
|
'name': name, |
|
'pre': pre_lines, |
|
'start': i, |
|
'end': decl_end, |
|
'line_count': decl_end - i + 1 |
|
}) |
|
i = decl_end + 1 |
|
continue |
|
|
|
if stripped.startswith('enum ') and '{' in stripped: |
|
name, decl_end = parse_simple_enum(lines_info, i) |
|
if name: |
|
enums.append({ |
|
'name': name, |
|
'pre': pre_lines, |
|
'start': i, |
|
'end': decl_end, |
|
'line_count': decl_end - i + 1 |
|
}) |
|
i = decl_end + 1 |
|
continue |
|
|
|
# Check for function declarations |
|
if '(' in stripped: |
|
func_info = parse_function(lines_info, i) |
|
if func_info: |
|
functions.append(func_info) |
|
i = func_info['end'] + 1 |
|
continue |
|
|
|
i += 1 |
|
|
|
return functions, structs, enums |
|
|
|
|
|
def parse_typedef_struct(lines_info, start_idx): |
|
"""Parse typedef struct { ... } name;""" |
|
# Collect lines until we have complete declaration |
|
decl_lines = [] |
|
brace_count = 0 |
|
|
|
for i in range(start_idx, len(lines_info)): |
|
line, is_code, _ = lines_info[i] |
|
decl_lines.append(line) |
|
|
|
if not is_code: |
|
continue |
|
|
|
stripped = line.strip() |
|
|
|
# Count braces (skip do-while) |
|
if not stripped.startswith('do {'): |
|
if not (stripped.startswith('}') and 'while(0)' in stripped): |
|
brace_count += line.count('{') - line.count('}') |
|
|
|
# If we've closed all braces and found semicolon, we're done |
|
if brace_count == 0 and stripped.endswith(';'): |
|
# Extract name from "} name;" |
|
decl_text = ' '.join([l.strip() for l in decl_lines]) |
|
close_brace = decl_text.rfind('}') |
|
semi = decl_text.find(';', close_brace) |
|
|
|
if close_brace > 0 and semi > close_brace: |
|
name = decl_text[close_brace + 1:semi].strip() |
|
return name, i |
|
return None, i |
|
|
|
return None, len(lines_info) - 1 |
|
|
|
|
|
def parse_typedef_enum(lines_info, start_idx): |
|
"""Parse typedef enum { ... } name;""" |
|
# Same logic as typedef struct |
|
decl_lines = [] |
|
brace_count = 0 |
|
|
|
for i in range(start_idx, len(lines_info)): |
|
line, is_code, _ = lines_info[i] |
|
decl_lines.append(line) |
|
|
|
if not is_code: |
|
continue |
|
|
|
stripped = line.strip() |
|
|
|
if not stripped.startswith('do {'): |
|
if not (stripped.startswith('}') and 'while(0)' in stripped): |
|
brace_count += line.count('{') - line.count('}') |
|
|
|
if brace_count == 0 and stripped.endswith(';'): |
|
decl_text = ' '.join([l.strip() for l in decl_lines]) |
|
close_brace = decl_text.rfind('}') |
|
semi = decl_text.find(';', close_brace) |
|
|
|
if close_brace > 0 and semi > close_brace: |
|
name = decl_text[close_brace + 1:semi].strip() |
|
return name, i |
|
return None, i |
|
|
|
return None, len(lines_info) - 1 |
|
|
|
|
|
def parse_simple_struct(lines_info, start_idx): |
|
"""Parse struct name { ... };""" |
|
line, is_code, _ = lines_info[start_idx] |
|
stripped = line.strip() |
|
|
|
# Extract name: "struct name {" |
|
match = re.match(r'struct\s+(\w+)\s*\{', stripped) |
|
if not match: |
|
return None, start_idx |
|
|
|
name = match.group(1) |
|
|
|
# Find closing brace |
|
brace_count = 1 |
|
for i in range(start_idx + 1, len(lines_info)): |
|
line, is_code, _ = lines_info[i] |
|
|
|
if not is_code: |
|
continue |
|
|
|
stripped = line.strip() |
|
|
|
if not stripped.startswith('do {'): |
|
if not (stripped.startswith('}') and 'while(0)' in stripped): |
|
brace_count += line.count('{') - line.count('}') |
|
|
|
if brace_count == 0: |
|
return name, i |
|
|
|
return name, len(lines_info) - 1 |
|
|
|
|
|
def parse_simple_enum(lines_info, start_idx): |
|
"""Parse enum name { ... };""" |
|
line, is_code, _ = lines_info[start_idx] |
|
stripped = line.strip() |
|
|
|
# Extract name: "enum name {" |
|
match = re.match(r'enum\s+(\w+)\s*\{', stripped) |
|
if not match: |
|
return None, start_idx |
|
|
|
name = match.group(1) |
|
|
|
# Find closing brace |
|
brace_count = 1 |
|
for i in range(start_idx + 1, len(lines_info)): |
|
line, is_code, _ = lines_info[i] |
|
|
|
if not is_code: |
|
continue |
|
|
|
stripped = line.strip() |
|
|
|
if not stripped.startswith('do {'): |
|
if not (stripped.startswith('}') and 'while(0)' in stripped): |
|
brace_count += line.count('{') - line.count('}') |
|
|
|
if brace_count == 0: |
|
return name, i |
|
|
|
return name, len(lines_info) - 1 |
|
|
|
|
|
def parse_function(lines_info, start_idx): |
|
"""Parse function declaration at level 0.""" |
|
line, is_code, pre_lines = lines_info[start_idx] |
|
stripped = line.strip() |
|
|
|
# Extract function name |
|
name, ret_type = extract_identifier_before_paren(stripped) |
|
|
|
if not name: |
|
return None |
|
|
|
# Check if this is a forward declaration (ends with ;) |
|
if stripped.endswith(';'): |
|
return None |
|
|
|
# Find opening brace - function body must start with { |
|
# Check current line and next line only |
|
decl_end = start_idx |
|
found_brace = False |
|
|
|
# Check current line first - look for { after ) |
|
paren_end = stripped.rfind(')') |
|
if paren_end > 0: |
|
after_paren = stripped[paren_end:] |
|
if '{' in after_paren: |
|
decl_end = start_idx |
|
found_brace = True |
|
|
|
# If not found, check next few lines (up to 3) for { |
|
# But stop if we encounter ; (end of prototype) |
|
if not found_brace: |
|
for offset in range(1, 4): # Check next 3 lines |
|
if start_idx + offset >= len(lines_info): |
|
break |
|
|
|
check_line, check_is_code, _ = lines_info[start_idx + offset] |
|
if not check_is_code: |
|
continue |
|
|
|
check_stripped = check_line.strip() |
|
|
|
# If we hit a semicolon, this is a prototype, not a definition |
|
if check_stripped.endswith(';'): |
|
return None |
|
|
|
# If we found opening brace, this is the function body |
|
# Check if '{' is present in the line (not necessarily at start due to formatting) |
|
if '{' in check_line: |
|
decl_end = start_idx + offset |
|
found_brace = True |
|
break |
|
|
|
if not found_brace: |
|
# No body - probably forward declaration or macro, skip |
|
return None |
|
|
|
# Find closing brace |
|
closing = find_matching_brace(lines_info, decl_end + 1, 0) |
|
|
|
# Extract arguments - need to collect all lines from start to decl_end |
|
# to handle multi-line function signatures |
|
sig_lines = [] |
|
for idx in range(start_idx, decl_end + 1): |
|
l, is_code, _ = lines_info[idx] |
|
if is_code: |
|
sig_lines.append(l) |
|
|
|
sig_text = ' '.join(sig_lines) |
|
paren_start = sig_text.find('(') |
|
paren_end = sig_text.rfind(')') |
|
if paren_start > 0 and paren_end > paren_start: |
|
args = sig_text[paren_start + 1:paren_end].strip() |
|
else: |
|
args = '' |
|
|
|
return { |
|
'type': 'function', |
|
'name': name, |
|
'args': args, |
|
'ret': ret_type, |
|
'pre': pre_lines, |
|
'start': start_idx, |
|
'end': closing, |
|
'line_count': closing - start_idx + 1 |
|
} |
|
|
|
|
|
def main(): |
|
if len(sys.argv) < 2: |
|
print("Usage: c_util toc | func <file> <funcname> | struct <file> <structname> | enum <file> <enumname>") |
|
sys.exit(1) |
|
|
|
cmd = sys.argv[1] |
|
|
|
files = read_filelist() |
|
contents = load_files(files) |
|
|
|
# Parse all files |
|
project_functions = {} |
|
project_structs = {} |
|
project_enums = {} |
|
|
|
for f, lines in contents.items(): |
|
funcs, strs, enums = extract_declarations(lines) |
|
project_functions[f] = funcs |
|
project_structs[f] = strs |
|
project_enums[f] = enums |
|
|
|
if cmd == 'toc': |
|
for f in files: |
|
if f not in contents: |
|
continue |
|
|
|
print(f"File: {f}") |
|
|
|
funcs = project_functions.get(f, []) |
|
if funcs: |
|
for func in funcs: |
|
print(f"Function: {func['ret']} {func['name']}({func['args']}) - {func['line_count']} lines") |
|
|
|
strs = project_structs.get(f, []) |
|
if strs: |
|
for s in strs: |
|
print(f"Struct: {s['name']} - {s['line_count']} lines") |
|
|
|
enums = project_enums.get(f, []) |
|
if enums: |
|
for e in enums: |
|
print(f"Enum: {e['name']} - {e['line_count']} lines") |
|
|
|
elif cmd == 'func': |
|
if len(sys.argv) < 4: |
|
print("Usage: c_util func <file> <funcname>") |
|
sys.exit(1) |
|
|
|
file = sys.argv[2] |
|
funcname = sys.argv[3] |
|
|
|
if file in project_functions: |
|
for func in project_functions[file]: |
|
if func['name'] == funcname: |
|
lines = contents[file] |
|
pre = func['pre'] |
|
body = lines[func['start']:func['end'] + 1] |
|
print('\n'.join(pre + body)) |
|
sys.exit(0) |
|
|
|
print("Function not found") |
|
sys.exit(1) |
|
|
|
elif cmd == 'struct': |
|
if len(sys.argv) < 4: |
|
print("Usage: c_util struct <file> <structname>") |
|
sys.exit(1) |
|
|
|
file = sys.argv[2] |
|
structname = sys.argv[3] |
|
|
|
if file in project_structs: |
|
for s in project_structs[file]: |
|
if s['name'] == structname: |
|
lines = contents[file] |
|
pre = s['pre'] |
|
body = lines[s['start']:s['end'] + 1] |
|
print('\n'.join(pre + body)) |
|
sys.exit(0) |
|
|
|
print("Struct not found") |
|
sys.exit(1) |
|
|
|
elif cmd == 'enum': |
|
if len(sys.argv) < 4: |
|
print("Usage: c_util enum <file> <enumname>") |
|
sys.exit(1) |
|
|
|
file = sys.argv[2] |
|
enumname = sys.argv[3] |
|
|
|
if file in project_enums: |
|
for e in project_enums[file]: |
|
if e['name'] == enumname: |
|
lines = contents[file] |
|
pre = e['pre'] |
|
body = lines[e['start']:e['end'] + 1] |
|
print('\n'.join(pre + body)) |
|
sys.exit(0) |
|
|
|
print("Enum not found") |
|
sys.exit(1) |
|
|
|
else: |
|
print("Unknown command") |
|
sys.exit(1) |
|
|
|
|
|
if __name__ == "__main__": |
|
main()
|
|
|