@ -1,365 +1,587 @@
#!/usr/bin/python3
#!/usr/bin/env python3
"""
c_util - C code navigation utility
Commands:
toc - Show table of contents for all files
func <file> <name> - Show function code
struct <file> <name> - Show struct definition
enum <file> <name> - Show enum definition
"""
import sys
import re
c_keywords = set([
C_KEYWORDS = {
'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static',
'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while',
'_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary',
'_Noreturn', '_Static_assert', '_Thread_local'
])
}
CONTROL_FLOW = {'if', 'for', 'while', 'switch', 'return', 'else'}
def read_filelist(filename='filelist.txt'):
"""Read list of files to process."""
with open(filename, 'r') as f:
return [line.strip() for line in f if line.strip()]
def load_files(files):
"""Load contents of all files."""
contents = {}
for f in files:
with open(f, 'r') as fd:
contents[f] = fd.read().splitlines()
try:
with open(f, 'r') as fd:
contents[f] = fd.read().splitlines()
except Exception as e:
print(f"Warning: could not read {f}: {e}", file=sys.stderr)
return contents
def extract_functions_and_structs(lines):
functions = []
structs = []
enums = []
i = 0
n = len(lines)
def preprocess_lines(lines):
"""
Preprocess lines:
- Remove preprocessor directives (#...)
- Remove comments (/* */ and //)
- Track pre_lines (comments before declarations)
Returns list of tuples: (original_line, is_code, pre_lines)
"""
result = []
multi_comment = False
extern_c_level = 0 # Track extern "C" nesting level
pre_lines = []
def skip_multi_comment(line):
nonlocal multi_comment
for line in lines:
stripped = line.strip()
# Handle multi-line comments
if multi_comment:
if '*/' in line:
multi_comment = False
return True
if '/*' in line and '*/' not in line:
multi_comment = True
return True
return False
while i < n:
line = lines[i]
stripped = line.strip()
if skip_multi_comment(line):
pre_lines.append(line)
i += 1
result.append((line, False, []))
continue
if stripped.startswith('//') or not stripped or stripped.startswith('#'):
# Start of multi-line comment
if '/*' in line:
end_pos = line.find('*/')
if end_pos == -1:
# Comment continues to next line
multi_comment = True
pre_lines.append(line)
result.append((line, False, []))
continue
else:
# Single-line block comment
pre_lines.append(line)
result.append((line, False, []))
continue
# Single-line comment
if stripped.startswith('//'):
pre_lines.append(line)
i += 1
result.append((line, False, []))
continue
# Skip closing braces
if stripped.startswith('}'):
pre_lines = []
# Preprocessor directive - remove but don't add to pre_lines
if stripped.startswith('#'):
result.append((line, False, []))
continue
# Empty line - keep as separator but not code
if not stripped:
result.append((line, False, []))
continue
# This is actual code
current_pre = pre_lines[:]
pre_lines = []
result.append((line, True, current_pre))
return result
def compute_brace_levels(lines_info):
"""
Compute brace nesting level for each line.
Returns list of levels (level before processing the line).
"""
levels = []
brace_level = 0
for line, is_code, _ in lines_info:
levels.append(brace_level)
if not is_code:
continue
stripped = line.strip()
# Skip preprocessor (already filtered but double-check)
if stripped.startswith('#'):
continue
# Skip extern "C" { blocks (C++ linkage)
if 'extern "C"' in stripped:
# Count braces in extern "C" line separately
if '{' in stripped:
# Don't count this brace as it opens/closes C linkage block
pass
continue
# Skip do { ... } while(0) - not real nesting
if stripped.startswith('do {'):
continue
if stripped.startswith('}') and 'while(0)' in stripped:
continue
# Count braces
brace_level += line.count('{') - line.count('}')
return levels
def extract_identifier_before_paren(line):
"""
Extract function name before first '('.
Returns (name, return_type) or (None, None) if invalid.
"""
paren_pos = line.find('(')
if paren_pos <= 0:
return None, None
before = line[:paren_pos].strip()
# Split into parts (handle pointers, qualifiers, etc.)
# e.g., "static int * func_name" -> ["static", "int", "*", "func_name"]
parts = before.split()
if not parts:
return None, None
# Last part should be function name
name = parts[-1].strip('*').strip()
# Check if it's a valid identifier
if not name.isidentifier():
return None, None
# Check if it's a keyword
if name in C_KEYWORDS or name in CONTROL_FLOW:
return None, None
# Return type is everything except name
ret_type = ' '.join(parts[:-1]) if len(parts) > 1 else 'void'
return name, ret_type
def find_matching_brace(lines_info, start_idx, start_level):
"""
Find line index of matching closing brace.
Starts from start_idx, looking for level to return to start_level.
"""
level = start_level
for i in range(start_idx, len(lines_info)):
line, is_code, _ = lines_info[i]
if not is_code:
continue
stripped = line.strip()
# Skip preprocessor
if stripped.startswith('#'):
continue
# Skip do-while
if stripped.startswith('do {'):
continue
if stripped.startswith('}') and 'while(0)' in stripped:
continue
# Check current level before this line
if level == start_level and i > start_idx and '}' in line:
return i
# Update level
level += line.count('{') - line.count('}')
return len(lines_info) - 1
def extract_declarations(lines):
"""
Extract all function, struct, and enum declarations.
Returns (functions, structs, enums).
"""
functions = []
structs = []
enums = []
# Preprocess
lines_info = preprocess_lines(lines)
# Compute brace levels
brace_levels = compute_brace_levels(lines_info)
i = 0
n = len(lines_info)
while i < n:
line, is_code, pre_lines = lines_info[i]
if not is_code:
i += 1
continue
# Check for struct/enum/union declarations
is_typedef = stripped.startswith('typedef ')
is_struct = stripped.startswith('struct ')
is_enum = stripped.startswith('enum ')
is_union = stripped.startswith('union ')
is_typedef_struct = stripped.startswith('typedef struct')
is_typedef_enum = stripped.startswith('typedef enum')
is_typedef_union = stripped.startswith('typedef union')
if is_typedef_struct or is_typedef_enum or is_typedef_union or is_struct or is_enum or is_union:
decl_lines = [line]
start_line = i
already_processed = False # Flag to skip brace counting if already processed
stripped = line.strip()
level = brace_levels[i]
# Only process declarations at level 0
if level != 0:
i += 1
# Collect declaration until we find { or ;
while i < n:
line = lines[i]
stripped_cont = line.strip()
if skip_multi_comment(line):
decl_lines.append(line)
i += 1
continue
if stripped_cont.startswith('//') or stripped_cont.startswith('#'):
break
decl_lines.append(line)
# Check if we hit the opening brace
if '{' in stripped_cont:
break
# Check if it's a forward declaration or end of typedef
# Forward declaration: "typedef enum name;" - no { before
# End of typedef: "} name;" - has { before and } in current line
if stripped_cont.endswith(';'):
has_brace = any('{' in l for l in decl_lines)
if not has_brace:
# Forward declaration, skip
pre_lines = []
i += 1
break
elif '}' in stripped_cont:
# End of typedef (e.g., "} debug_level_t;")
# Don't need to count braces, already have the complete declaration
end_line = i
decl_text = ' '.join([l.strip() for l in decl_lines])
# Extract name
close_brace = decl_text.rfind('}')
semi = decl_text.find(';', close_brace)
if semi > close_brace:
name = decl_text[close_brace + 1:semi].strip()
else:
name = decl_text[close_brace + 1:].strip()
if is_typedef_enum:
enums.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1})
elif is_typedef_struct or is_typedef_union:
structs.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1})
pre_lines = []
i += 1
already_processed = True
break
i += 1
# Skip brace counting if we already processed this typedef (found "} name;")
if already_processed:
pre_lines = []
continue
# Check for typedef struct/enum/union
if stripped.startswith('typedef struct'):
name, decl_end = parse_typedef_struct(lines_info, i)
if name:
structs.append({
'name': name,
'pre': pre_lines,
'start': i,
'end': decl_end,
'line_count': decl_end - i + 1
})
i = decl_end + 1
continue
if i >= n or not any('{' in l for l in decl_lines):
pre_lines = []
if stripped.startswith('typedef enum'):
name, decl_end = parse_typedef_enum(lines_info, i)
if name:
enums.append({
'name': name,
'pre': pre_lines,
'start': i,
'end': decl_end,
'line_count': decl_end - i + 1
})
i = decl_end + 1
continue
# Check for simple struct/enum declarations
if stripped.startswith('struct ') and '{' in stripped:
name, decl_end = parse_simple_struct(lines_info, i)
if name:
structs.append({
'name': name,
'pre': pre_lines,
'start': i,
'end': decl_end,
'line_count': decl_end - i + 1
})
i = decl_end + 1
continue
if stripped.startswith('enum ') and '{' in stripped:
name, decl_end = parse_simple_enum(lines_info, i)
if name:
enums.append({
'name': name,
'pre': pre_lines,
'start': i,
'end': decl_end,
'line_count': decl_end - i + 1
})
i = decl_end + 1
continue
# Check for function declarations
if '(' in stripped:
func_info = parse_function(lines_info, i)
if func_info:
functions.append(func_info)
i = func_info['end'] + 1
continue
i += 1
return functions, structs, enums
# Now we're inside the struct/enum/union body, find the closing }
brace_level = 1
while i < n and brace_level > 0:
i += 1
if i >= n:
break
line = lines[i]
decl_lines.append(line)
stripped_line = line.strip()
if skip_multi_comment(line):
continue
# Skip preprocessor directives when counting braces
# Check for # after stripping leading whitespace (handles indented #define)
if stripped_line.startswith('#'):
continue
# Skip extern "C" { blocks (C++ linkage)
if 'extern "C"' in stripped_line or stripped_line.startswith('extern'):
if '{' in stripped_line:
extern_c_level += 1
continue
# Skip closing brace of extern "C" block
if extern_c_level > 0 and stripped_line == '}':
extern_c_level -= 1
continue
# If we're inside extern "C", skip counting braces
if extern_c_level > 0:
continue
# Skip do { ... } while(0) patterns in macros
if stripped_line.startswith('do {'):
continue
if stripped_line.startswith('}') and 'while(0)' in stripped_line:
continue
brace_level += line.count('{') - line.count('}')
# Check if there's a name after } (for typedef) or if it's a simple struct
end_line = i
def parse_typedef_struct(lines_info, start_idx):
"""Parse typedef struct { ... } name;"""
# Collect lines until we have complete declaration
decl_lines = []
brace_count = 0
for i in range(start_idx, len(lines_info)):
line, is_code, _ = lines_info[i]
decl_lines.append(line)
if not is_code:
continue
stripped = line.strip()
# Count braces (skip do-while)
if not stripped.startswith('do {'):
if not (stripped.startswith('}') and 'while(0)' in stripped):
brace_count += line.count('{') - line.count('}')
# If we've closed all braces and found semicolon, we're done
if brace_count == 0 and stripped.endswith(';'):
# Extract name from "} name;"
decl_text = ' '.join([l.strip() for l in decl_lines])
# Extract name and determine type
if is_typedef_struct or is_typedef_enum or is_typedef_union:
# Name is after the closing brace and before ;
# Format: typedef struct { ... } name;
close_brace = decl_text.rfind('}')
semi = decl_text.find(';', close_brace)
if semi > close_brace:
name = decl_text[close_brace + 1:semi].strip()
else:
name = decl_text[close_brace + 1:].strip()
if is_typedef_enum:
item_type = 'enum'
enums.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1})
else:
item_type = 'struct'
structs.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1})
else:
# Simple struct/enum/union name { ... };
# Name is between keyword and {
keyword = 'struct' if is_struct else ('enum' if is_enum else 'union')
keyword_pos = decl_text.find(keyword)
brace_pos = decl_text.find('{', keyword_pos)
name = decl_text[keyword_pos + len(keyword):brace_pos].strip()
# Skip anonymous structs (no name) - they are local variables, not declarations
if not name:
pre_lines = []
continue
if is_enum:
enums.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1})
else:
structs.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1})
pre_lines = []
close_brace = decl_text.rfind('}')
semi = decl_text.find(';', close_brace)
if close_brace > 0 and semi > close_brace:
name = decl_text[close_brace + 1:semi].strip()
return name, i
return None, i
return None, len(lines_info) - 1
def parse_typedef_enum(lines_info, start_idx):
"""Parse typedef enum { ... } name;"""
# Same logic as typedef struct
decl_lines = []
brace_count = 0
for i in range(start_idx, len(lines_info)):
line, is_code, _ = lines_info[i]
decl_lines.append(line)
if not is_code:
continue
# Handle function declarations
if '(' in stripped and not stripped.startswith('typedef'):
decl_lines = [line]
start_line = i
i += 1
paren_level = stripped.count('(') - stripped.count(')')
while i < n and paren_level > 0:
line = lines[i]
stripped_cont = line.strip()
if skip_multi_comment(line):
decl_lines.append(line)
i += 1
continue
if stripped_cont.startswith('//') or stripped_cont.startswith('#'):
break
decl_lines.append(line)
paren_level += stripped_cont.count('(') - stripped_cont.count(')')
# Check if we closed all parens and found opening brace on same line
if paren_level == 0 and '{' in stripped_cont:
break
i += 1
# Check if current or next line starts with {
has_brace = False
if i < n:
current_line = lines[i].strip()
if '{' in current_line:
has_brace = True
elif i + 1 < n and lines[i + 1].strip().startswith('{'):
i += 1
has_brace = True
decl_lines.append(lines[i])
i += 1
stripped = line.strip()
if not stripped.startswith('do {'):
if not (stripped.startswith('}') and 'while(0)' in stripped):
brace_count += line.count('{') - line.count('}')
if brace_count == 0 and stripped.endswith(';'):
decl_text = ' '.join([l.strip() for l in decl_lines])
close_brace = decl_text.rfind('}')
semi = decl_text.find(';', close_brace)
if has_brace:
decl_lines.append(lines[i])
i += 1
# Find function end
brace_level = 1
while i < n and brace_level > 0:
line = lines[i]
decl_lines.append(line)
if skip_multi_comment(line):
i += 1
continue
brace_level += line.count('{') - line.count('}')
i += 1
end_line = i - 1
# Parse function signature
decl_text = ' '.join([l.strip() for l in decl_lines])
paren_start = decl_text.find('(')
paren_end = decl_text.rfind(')', 0, decl_text.find('{'))
if paren_start > 0 and paren_end > paren_start:
args = decl_text[paren_start + 1:paren_end].strip()
before_paren = decl_text[:paren_start].strip()
parts = before_paren.rsplit(maxsplit=1)
if len(parts) == 2:
ret_type = parts[0]
name = parts[1]
else:
ret_type = 'void'
name = parts[0] if parts else 'unknown'
if name not in c_keywords and name.isidentifier():
functions.append({
'type': 'function',
'name': name,
'args': args,
'ret': ret_type,
'pre': pre_lines[:],
'start': start_line,
'end': end_line,
'line_count': end_line - start_line + 1
})
pre_lines = []
if close_brace > 0 and semi > close_brace:
name = decl_text[close_brace + 1:semi].strip()
return name, i
return None, i
return None, len(lines_info) - 1
def parse_simple_struct(lines_info, start_idx):
"""Parse struct name { ... };"""
line, is_code, _ = lines_info[start_idx]
stripped = line.strip()
# Extract name: "struct name {"
match = re.match(r'struct\s+(\w+)\s*\{', stripped)
if not match:
return None, start_idx
name = match.group(1)
# Find closing brace
brace_count = 1
for i in range(start_idx + 1, len(lines_info)):
line, is_code, _ = lines_info[i]
if not is_code:
continue
# Not a declaration we care about
pre_lines = []
i += 1
return functions, structs, enums
if __name__ == "__main__":
stripped = line.strip()
if not stripped.startswith('do {'):
if not (stripped.startswith('}') and 'while(0)' in stripped):
brace_count += line.count('{') - line.count('}')
if brace_count == 0:
return name, i
return name, len(lines_info) - 1
def parse_simple_enum(lines_info, start_idx):
"""Parse enum name { ... };"""
line, is_code, _ = lines_info[start_idx]
stripped = line.strip()
# Extract name: "enum name {"
match = re.match(r'enum\s+(\w+)\s*\{', stripped)
if not match:
return None, start_idx
name = match.group(1)
# Find closing brace
brace_count = 1
for i in range(start_idx + 1, len(lines_info)):
line, is_code, _ = lines_info[i]
if not is_code:
continue
stripped = line.strip()
if not stripped.startswith('do {'):
if not (stripped.startswith('}') and 'while(0)' in stripped):
brace_count += line.count('{') - line.count('}')
if brace_count == 0:
return name, i
return name, len(lines_info) - 1
def parse_function(lines_info, start_idx):
"""Parse function declaration at level 0."""
line, is_code, pre_lines = lines_info[start_idx]
stripped = line.strip()
# Extract function name
name, ret_type = extract_identifier_before_paren(stripped)
if not name:
return None
# Check if this is a forward declaration (ends with ;)
if stripped.endswith(';'):
return None
# Find opening brace - function body must start with {
# Check current line and next line only
decl_end = start_idx
found_brace = False
# Check current line first - look for { after )
paren_end = stripped.rfind(')')
if paren_end > 0:
after_paren = stripped[paren_end:]
if '{' in after_paren:
decl_end = start_idx
found_brace = True
# If not found, check next few lines (up to 3) for {
# But stop if we encounter ; (end of prototype)
if not found_brace:
for offset in range(1, 4): # Check next 3 lines
if start_idx + offset >= len(lines_info):
break
check_line, check_is_code, _ = lines_info[start_idx + offset]
if not check_is_code:
continue
check_stripped = check_line.strip()
# If we hit a semicolon, this is a prototype, not a definition
if check_stripped.endswith(';'):
return None
# If we found opening brace, this is the function body
# Check if '{' is present in the line (not necessarily at start due to formatting)
if '{' in check_line:
decl_end = start_idx + offset
found_brace = True
break
if not found_brace:
# No body - probably forward declaration or macro, skip
return None
# Find closing brace
closing = find_matching_brace(lines_info, decl_end + 1, 0)
# Extract arguments - need to collect all lines from start to decl_end
# to handle multi-line function signatures
sig_lines = []
for idx in range(start_idx, decl_end + 1):
l, is_code, _ = lines_info[idx]
if is_code:
sig_lines.append(l)
sig_text = ' '.join(sig_lines)
paren_start = sig_text.find('(')
paren_end = sig_text.rfind(')')
if paren_start > 0 and paren_end > paren_start:
args = sig_text[paren_start + 1:paren_end].strip()
else:
args = ''
return {
'type': 'function',
'name': name,
'args': args,
'ret': ret_type,
'pre': pre_lines,
'start': start_idx,
'end': closing,
'line_count': closing - start_idx + 1
}
def main():
if len(sys.argv) < 2:
print("Usage: c_util toc | func <file> <funcname> | struct <file> <structname> | enum <file> <enumname>")
sys.exit(1)
cmd = sys.argv[1]
files = read_filelist()
contents = load_files(files)
# Parse all files
project_functions = {}
project_structs = {}
project_enums = {}
for f, lines in contents.items():
funcs, strs, enums = extract_functions_and_structs(lines)
funcs, strs, enums = extract_declaration s(lines)
project_functions[f] = funcs
project_structs[f] = strs
project_enums[f] = enums
if len(sys.argv) < 2:
print("Usage: python script.py toc | func <file> <funcname> | struct <file> <structname> | enum <file> <enumname>")
sys.exit(1)
cmd = sys.argv[1]
if cmd == 'toc':
for f in files:
if f not in contents:
continue
print(f"File: {f}")
funcs = project_functions.get(f, [])
strs = project_structs.get(f, [])
enums = project_enums.get(f, [])
if funcs:
for func in funcs:
print(f"Function: {func['ret']} {func['name']}({func['args']}) - {func['line_count']} lines")
strs = project_structs.get(f, [])
if strs:
for s in strs:
print(f"Struct: {s['name']} - {s['line_count']} lines")
enums = project_enums.get(f, [])
if enums:
for e in enums:
print(f"Enum: {e['name']} - {e['line_count']} lines")
elif cmd == 'func':
if len(sys.argv) < 4:
print("Usage: python script.py func <file> <funcname>")
print("Usage: c_util func <file> <funcname>")
sys.exit(1)
file = sys.argv[2]
funcname = sys.argv[3]
found = False
if file in project_functions:
for func in project_functions[file]:
if func['name'] == funcname:
@ -367,45 +589,55 @@ if __name__ == "__main__":
pre = func['pre']
body = lines[func['start']:func['end'] + 1]
print('\n'.join(pre + body))
found = True
break
if not found:
print("Function not found")
sys.exit(0)
print("Function not found")
sys.exit(1)
elif cmd == 'struct':
if len(sys.argv) < 4:
print("Usage: python script.py struct <file> <structname>")
print("Usage: c_util struct <file> <structname>")
sys.exit(1)
file = sys.argv[2]
structname = sys.argv[3]
found = False
if file in project_structs:
for strct in project_structs[file]:
if strct ['name'] == structname:
for s in project_structs[file]:
if s['name'] == structname:
lines = contents[file]
pre = strct ['pre']
body = lines[strct ['start']:strct ['end'] + 1]
pre = s['pre']
body = lines[s['start']:s['end'] + 1]
print('\n'.join(pre + body))
found = True
break
if not found:
print("Struct not found")
sys.exit(0)
print("Struct not found")
sys.exit(1)
elif cmd == 'enum':
if len(sys.argv) < 4:
print("Usage: python script.py enum <file> <enumname>")
print("Usage: c_util enum <file> <enumname>")
sys.exit(1)
file = sys.argv[2]
enumname = sys.argv[3]
found = False
if file in project_enums:
for enm in project_enums[file]:
if enm ['name'] == enumname:
for e in project_enums[file]:
if e['name'] == enumname:
lines = contents[file]
pre = enm ['pre']
body = lines[enm ['start']:enm ['end'] + 1]
pre = e['pre']
body = lines[e['start']:e['end'] + 1]
print('\n'.join(pre + body))
found = True
break
if not found:
print("Enum not found")
sys.exit(0)
print("Enum not found")
sys.exit(1)
else:
print("Unknown command")
sys.exit(1)
if __name__ == "__main__":
main()