#!/usr/bin/env python3 """ c_util - C code navigation utility Commands: toc - Show table of contents for all files func - Show function code struct - Show struct definition enum - Show enum definition """ import sys import re C_KEYWORDS = { 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while', '_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary', '_Noreturn', '_Static_assert', '_Thread_local' } CONTROL_FLOW = {'if', 'for', 'while', 'switch', 'return', 'else'} def read_filelist(filename='filelist.txt'): """Read list of files to process.""" with open(filename, 'r') as f: return [line.strip() for line in f if line.strip()] def load_files(files): """Load contents of all files.""" contents = {} for f in files: try: with open(f, 'r') as fd: contents[f] = fd.read().splitlines() except Exception as e: print(f"Warning: could not read {f}: {e}", file=sys.stderr) return contents def preprocess_lines(lines): """ Preprocess lines: - Remove preprocessor directives (#...) - Remove comments (/* */ and //) - Track pre_lines (comments before declarations) Returns list of tuples: (original_line, is_code, pre_lines) """ result = [] multi_comment = False pre_lines = [] for line in lines: stripped = line.strip() # Handle multi-line comments if multi_comment: if '*/' in line: multi_comment = False pre_lines.append(line) result.append((line, False, [])) continue # Start of multi-line comment if '/*' in line: end_pos = line.find('*/') if end_pos == -1: # Comment continues to next line multi_comment = True pre_lines.append(line) result.append((line, False, [])) continue else: # Single-line block comment pre_lines.append(line) result.append((line, False, [])) continue # Single-line comment if stripped.startswith('//'): pre_lines.append(line) result.append((line, False, [])) continue # Preprocessor directive - remove but don't add to pre_lines if stripped.startswith('#'): result.append((line, False, [])) continue # Empty line - keep as separator but not code if not stripped: result.append((line, False, [])) continue # This is actual code current_pre = pre_lines[:] pre_lines = [] result.append((line, True, current_pre)) return result def compute_brace_levels(lines_info): """ Compute brace nesting level for each line. Returns list of levels (level before processing the line). """ levels = [] brace_level = 0 for line, is_code, _ in lines_info: levels.append(brace_level) if not is_code: continue stripped = line.strip() # Skip preprocessor (already filtered but double-check) if stripped.startswith('#'): continue # Skip extern "C" { blocks (C++ linkage) if 'extern "C"' in stripped: # Count braces in extern "C" line separately if '{' in stripped: # Don't count this brace as it opens/closes C linkage block pass continue # Skip do { ... } while(0) - not real nesting if stripped.startswith('do {'): continue if stripped.startswith('}') and 'while(0)' in stripped: continue # Count braces brace_level += line.count('{') - line.count('}') return levels def extract_identifier_before_paren(line): """ Extract function name before first '('. Returns (name, return_type) or (None, None) if invalid. """ paren_pos = line.find('(') if paren_pos <= 0: return None, None before = line[:paren_pos].strip() # Split into parts (handle pointers, qualifiers, etc.) # e.g., "static int * func_name" -> ["static", "int", "*", "func_name"] parts = before.split() if not parts: return None, None # Last part should be function name name = parts[-1].strip('*').strip() # Check if it's a valid identifier if not name.isidentifier(): return None, None # Check if it's a keyword if name in C_KEYWORDS or name in CONTROL_FLOW: return None, None # Return type is everything except name ret_type = ' '.join(parts[:-1]) if len(parts) > 1 else 'void' return name, ret_type def find_matching_brace(lines_info, start_idx, start_level): """ Find line index of matching closing brace. Starts from start_idx, looking for level to return to start_level. """ level = start_level for i in range(start_idx, len(lines_info)): line, is_code, _ = lines_info[i] if not is_code: continue stripped = line.strip() # Skip preprocessor if stripped.startswith('#'): continue # Skip do-while if stripped.startswith('do {'): continue if stripped.startswith('}') and 'while(0)' in stripped: continue # Check current level before this line if level == start_level and i > start_idx and '}' in line: return i # Update level level += line.count('{') - line.count('}') return len(lines_info) - 1 def extract_declarations(lines): """ Extract all function, struct, and enum declarations. Returns (functions, structs, enums). """ functions = [] structs = [] enums = [] # Preprocess lines_info = preprocess_lines(lines) # Compute brace levels brace_levels = compute_brace_levels(lines_info) i = 0 n = len(lines_info) while i < n: line, is_code, pre_lines = lines_info[i] if not is_code: i += 1 continue stripped = line.strip() level = brace_levels[i] # Only process declarations at level 0 if level != 0: i += 1 continue # Check for typedef struct/enum/union if stripped.startswith('typedef struct'): name, decl_end = parse_typedef_struct(lines_info, i) if name: structs.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue if stripped.startswith('typedef enum'): name, decl_end = parse_typedef_enum(lines_info, i) if name: enums.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue # Check for simple struct/enum declarations if stripped.startswith('struct ') and '{' in stripped: name, decl_end = parse_simple_struct(lines_info, i) if name: structs.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue if stripped.startswith('enum ') and '{' in stripped: name, decl_end = parse_simple_enum(lines_info, i) if name: enums.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue # Check for function declarations if '(' in stripped: func_info = parse_function(lines_info, i) if func_info: functions.append(func_info) i = func_info['end'] + 1 continue i += 1 return functions, structs, enums def parse_typedef_struct(lines_info, start_idx): """Parse typedef struct { ... } name;""" # Collect lines until we have complete declaration decl_lines = [] brace_count = 0 for i in range(start_idx, len(lines_info)): line, is_code, _ = lines_info[i] decl_lines.append(line) if not is_code: continue stripped = line.strip() # Count braces (skip do-while) if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') # If we've closed all braces and found semicolon, we're done if brace_count == 0 and stripped.endswith(';'): # Extract name from "} name;" decl_text = ' '.join([l.strip() for l in decl_lines]) close_brace = decl_text.rfind('}') semi = decl_text.find(';', close_brace) if close_brace > 0 and semi > close_brace: name = decl_text[close_brace + 1:semi].strip() return name, i return None, i return None, len(lines_info) - 1 def parse_typedef_enum(lines_info, start_idx): """Parse typedef enum { ... } name;""" # Same logic as typedef struct decl_lines = [] brace_count = 0 for i in range(start_idx, len(lines_info)): line, is_code, _ = lines_info[i] decl_lines.append(line) if not is_code: continue stripped = line.strip() if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') if brace_count == 0 and stripped.endswith(';'): decl_text = ' '.join([l.strip() for l in decl_lines]) close_brace = decl_text.rfind('}') semi = decl_text.find(';', close_brace) if close_brace > 0 and semi > close_brace: name = decl_text[close_brace + 1:semi].strip() return name, i return None, i return None, len(lines_info) - 1 def parse_simple_struct(lines_info, start_idx): """Parse struct name { ... };""" line, is_code, _ = lines_info[start_idx] stripped = line.strip() # Extract name: "struct name {" match = re.match(r'struct\s+(\w+)\s*\{', stripped) if not match: return None, start_idx name = match.group(1) # Find closing brace brace_count = 1 for i in range(start_idx + 1, len(lines_info)): line, is_code, _ = lines_info[i] if not is_code: continue stripped = line.strip() if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') if brace_count == 0: return name, i return name, len(lines_info) - 1 def parse_simple_enum(lines_info, start_idx): """Parse enum name { ... };""" line, is_code, _ = lines_info[start_idx] stripped = line.strip() # Extract name: "enum name {" match = re.match(r'enum\s+(\w+)\s*\{', stripped) if not match: return None, start_idx name = match.group(1) # Find closing brace brace_count = 1 for i in range(start_idx + 1, len(lines_info)): line, is_code, _ = lines_info[i] if not is_code: continue stripped = line.strip() if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') if brace_count == 0: return name, i return name, len(lines_info) - 1 def parse_function(lines_info, start_idx): """Parse function declaration at level 0.""" line, is_code, pre_lines = lines_info[start_idx] stripped = line.strip() # Extract function name name, ret_type = extract_identifier_before_paren(stripped) if not name: return None # Check if this is a forward declaration (ends with ;) if stripped.endswith(';'): return None # Find opening brace - function body must start with { # Check current line and next line only decl_end = start_idx found_brace = False # Check current line first - look for { after ) paren_end = stripped.rfind(')') if paren_end > 0: after_paren = stripped[paren_end:] if '{' in after_paren: decl_end = start_idx found_brace = True # If not found, check next few lines (up to 3) for { # But stop if we encounter ; (end of prototype) if not found_brace: for offset in range(1, 4): # Check next 3 lines if start_idx + offset >= len(lines_info): break check_line, check_is_code, _ = lines_info[start_idx + offset] if not check_is_code: continue check_stripped = check_line.strip() # If we hit a semicolon, this is a prototype, not a definition if check_stripped.endswith(';'): return None # If we found opening brace, this is the function body # Check if '{' is present in the line (not necessarily at start due to formatting) if '{' in check_line: decl_end = start_idx + offset found_brace = True break if not found_brace: # No body - probably forward declaration or macro, skip return None # Find closing brace closing = find_matching_brace(lines_info, decl_end + 1, 0) # Extract arguments - need to collect all lines from start to decl_end # to handle multi-line function signatures sig_lines = [] for idx in range(start_idx, decl_end + 1): l, is_code, _ = lines_info[idx] if is_code: sig_lines.append(l) sig_text = ' '.join(sig_lines) paren_start = sig_text.find('(') paren_end = sig_text.rfind(')') if paren_start > 0 and paren_end > paren_start: args = sig_text[paren_start + 1:paren_end].strip() else: args = '' return { 'type': 'function', 'name': name, 'args': args, 'ret': ret_type, 'pre': pre_lines, 'start': start_idx, 'end': closing, 'line_count': closing - start_idx + 1 } def main(): if len(sys.argv) < 2: print("Usage: c_util toc | func | struct | enum ") sys.exit(1) cmd = sys.argv[1] files = read_filelist() contents = load_files(files) # Parse all files project_functions = {} project_structs = {} project_enums = {} for f, lines in contents.items(): funcs, strs, enums = extract_declarations(lines) project_functions[f] = funcs project_structs[f] = strs project_enums[f] = enums if cmd == 'toc': for f in files: if f not in contents: continue print(f"File: {f}") funcs = project_functions.get(f, []) if funcs: for func in funcs: print(f"Function: {func['ret']} {func['name']}({func['args']}) - {func['line_count']} lines") strs = project_structs.get(f, []) if strs: for s in strs: print(f"Struct: {s['name']} - {s['line_count']} lines") enums = project_enums.get(f, []) if enums: for e in enums: print(f"Enum: {e['name']} - {e['line_count']} lines") elif cmd == 'func': if len(sys.argv) < 4: print("Usage: c_util func ") sys.exit(1) file = sys.argv[2] funcname = sys.argv[3] if file in project_functions: for func in project_functions[file]: if func['name'] == funcname: lines = contents[file] pre = func['pre'] body = lines[func['start']:func['end'] + 1] print('\n'.join(pre + body)) sys.exit(0) print("Function not found") sys.exit(1) elif cmd == 'struct': if len(sys.argv) < 4: print("Usage: c_util struct ") sys.exit(1) file = sys.argv[2] structname = sys.argv[3] if file in project_structs: for s in project_structs[file]: if s['name'] == structname: lines = contents[file] pre = s['pre'] body = lines[s['start']:s['end'] + 1] print('\n'.join(pre + body)) sys.exit(0) print("Struct not found") sys.exit(1) elif cmd == 'enum': if len(sys.argv) < 4: print("Usage: c_util enum ") sys.exit(1) file = sys.argv[2] enumname = sys.argv[3] if file in project_enums: for e in project_enums[file]: if e['name'] == enumname: lines = contents[file] pre = e['pre'] body = lines[e['start']:e['end'] + 1] print('\n'.join(pre + body)) sys.exit(0) print("Enum not found") sys.exit(1) else: print("Unknown command") sys.exit(1) if __name__ == "__main__": main()