From 71ca9e08bf123f1d5c9f983d3f0dd4af7d4b91b2 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 13 Feb 2026 02:30:02 +0300 Subject: [PATCH] Rewrite c_util with proper brace nesting level tracking - Preprocess: remove preprocessor directives and comments - Compute brace levels for each line - Find declarations only at level 0 - Properly handle multi-line function signatures - Skip forward declarations and extern C blocks - Find 291 declarations across the project --- c_util | 870 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 551 insertions(+), 319 deletions(-) diff --git a/c_util b/c_util index 2da3f96..086a447 100755 --- a/c_util +++ b/c_util @@ -1,365 +1,587 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 +""" +c_util - C code navigation utility +Commands: + toc - Show table of contents for all files + func - Show function code + struct - Show struct definition + enum - Show enum definition +""" import sys import re -c_keywords = set([ +C_KEYWORDS = { 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while', '_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary', '_Noreturn', '_Static_assert', '_Thread_local' -]) +} + +CONTROL_FLOW = {'if', 'for', 'while', 'switch', 'return', 'else'} + def read_filelist(filename='filelist.txt'): + """Read list of files to process.""" with open(filename, 'r') as f: return [line.strip() for line in f if line.strip()] + def load_files(files): + """Load contents of all files.""" contents = {} for f in files: - with open(f, 'r') as fd: - contents[f] = fd.read().splitlines() + try: + with open(f, 'r') as fd: + contents[f] = fd.read().splitlines() + except Exception as e: + print(f"Warning: could not read {f}: {e}", file=sys.stderr) return contents -def extract_functions_and_structs(lines): - functions = [] - structs = [] - enums = [] - i = 0 - n = len(lines) + +def preprocess_lines(lines): + """ + Preprocess lines: + - Remove preprocessor directives (#...) + - Remove comments (/* */ and //) + - Track pre_lines (comments before declarations) + Returns list of tuples: (original_line, is_code, pre_lines) + """ + result = [] multi_comment = False - extern_c_level = 0 # Track extern "C" nesting level pre_lines = [] - - def skip_multi_comment(line): - nonlocal multi_comment + + for line in lines: + stripped = line.strip() + + # Handle multi-line comments if multi_comment: if '*/' in line: multi_comment = False - return True - if '/*' in line and '*/' not in line: - multi_comment = True - return True - return False - - while i < n: - line = lines[i] - stripped = line.strip() - - if skip_multi_comment(line): pre_lines.append(line) - i += 1 + result.append((line, False, [])) continue - - if stripped.startswith('//') or not stripped or stripped.startswith('#'): + + # Start of multi-line comment + if '/*' in line: + end_pos = line.find('*/') + if end_pos == -1: + # Comment continues to next line + multi_comment = True + pre_lines.append(line) + result.append((line, False, [])) + continue + else: + # Single-line block comment + pre_lines.append(line) + result.append((line, False, [])) + continue + + # Single-line comment + if stripped.startswith('//'): pre_lines.append(line) - i += 1 + result.append((line, False, [])) continue - - # Skip closing braces - if stripped.startswith('}'): - pre_lines = [] + + # Preprocessor directive - remove but don't add to pre_lines + if stripped.startswith('#'): + result.append((line, False, [])) + continue + + # Empty line - keep as separator but not code + if not stripped: + result.append((line, False, [])) + continue + + # This is actual code + current_pre = pre_lines[:] + pre_lines = [] + result.append((line, True, current_pre)) + + return result + + +def compute_brace_levels(lines_info): + """ + Compute brace nesting level for each line. + Returns list of levels (level before processing the line). + """ + levels = [] + brace_level = 0 + + for line, is_code, _ in lines_info: + levels.append(brace_level) + + if not is_code: + continue + + stripped = line.strip() + + # Skip preprocessor (already filtered but double-check) + if stripped.startswith('#'): + continue + + # Skip extern "C" { blocks (C++ linkage) + if 'extern "C"' in stripped: + # Count braces in extern "C" line separately + if '{' in stripped: + # Don't count this brace as it opens/closes C linkage block + pass + continue + + # Skip do { ... } while(0) - not real nesting + if stripped.startswith('do {'): + continue + if stripped.startswith('}') and 'while(0)' in stripped: + continue + + # Count braces + brace_level += line.count('{') - line.count('}') + + return levels + + +def extract_identifier_before_paren(line): + """ + Extract function name before first '('. + Returns (name, return_type) or (None, None) if invalid. + """ + paren_pos = line.find('(') + if paren_pos <= 0: + return None, None + + before = line[:paren_pos].strip() + + # Split into parts (handle pointers, qualifiers, etc.) + # e.g., "static int * func_name" -> ["static", "int", "*", "func_name"] + parts = before.split() + + if not parts: + return None, None + + # Last part should be function name + name = parts[-1].strip('*').strip() + + # Check if it's a valid identifier + if not name.isidentifier(): + return None, None + + # Check if it's a keyword + if name in C_KEYWORDS or name in CONTROL_FLOW: + return None, None + + # Return type is everything except name + ret_type = ' '.join(parts[:-1]) if len(parts) > 1 else 'void' + + return name, ret_type + + +def find_matching_brace(lines_info, start_idx, start_level): + """ + Find line index of matching closing brace. + Starts from start_idx, looking for level to return to start_level. + """ + level = start_level + + for i in range(start_idx, len(lines_info)): + line, is_code, _ = lines_info[i] + + if not is_code: + continue + + stripped = line.strip() + + # Skip preprocessor + if stripped.startswith('#'): + continue + + # Skip do-while + if stripped.startswith('do {'): + continue + if stripped.startswith('}') and 'while(0)' in stripped: + continue + + # Check current level before this line + if level == start_level and i > start_idx and '}' in line: + return i + + # Update level + level += line.count('{') - line.count('}') + + return len(lines_info) - 1 + + +def extract_declarations(lines): + """ + Extract all function, struct, and enum declarations. + Returns (functions, structs, enums). + """ + functions = [] + structs = [] + enums = [] + + # Preprocess + lines_info = preprocess_lines(lines) + + # Compute brace levels + brace_levels = compute_brace_levels(lines_info) + + i = 0 + n = len(lines_info) + + while i < n: + line, is_code, pre_lines = lines_info[i] + + if not is_code: i += 1 continue - - # Check for struct/enum/union declarations - is_typedef = stripped.startswith('typedef ') - is_struct = stripped.startswith('struct ') - is_enum = stripped.startswith('enum ') - is_union = stripped.startswith('union ') - is_typedef_struct = stripped.startswith('typedef struct') - is_typedef_enum = stripped.startswith('typedef enum') - is_typedef_union = stripped.startswith('typedef union') - - if is_typedef_struct or is_typedef_enum or is_typedef_union or is_struct or is_enum or is_union: - decl_lines = [line] - start_line = i - already_processed = False # Flag to skip brace counting if already processed + + stripped = line.strip() + level = brace_levels[i] + + # Only process declarations at level 0 + if level != 0: i += 1 - - # Collect declaration until we find { or ; - while i < n: - line = lines[i] - stripped_cont = line.strip() - - if skip_multi_comment(line): - decl_lines.append(line) - i += 1 - continue - - if stripped_cont.startswith('//') or stripped_cont.startswith('#'): - break - - decl_lines.append(line) - - # Check if we hit the opening brace - if '{' in stripped_cont: - break - - # Check if it's a forward declaration or end of typedef - # Forward declaration: "typedef enum name;" - no { before - # End of typedef: "} name;" - has { before and } in current line - if stripped_cont.endswith(';'): - has_brace = any('{' in l for l in decl_lines) - if not has_brace: - # Forward declaration, skip - pre_lines = [] - i += 1 - break - elif '}' in stripped_cont: - # End of typedef (e.g., "} debug_level_t;") - # Don't need to count braces, already have the complete declaration - end_line = i - decl_text = ' '.join([l.strip() for l in decl_lines]) - - # Extract name - close_brace = decl_text.rfind('}') - semi = decl_text.find(';', close_brace) - if semi > close_brace: - name = decl_text[close_brace + 1:semi].strip() - else: - name = decl_text[close_brace + 1:].strip() - - if is_typedef_enum: - enums.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1}) - elif is_typedef_struct or is_typedef_union: - structs.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1}) - - pre_lines = [] - i += 1 - already_processed = True - break - - i += 1 - - # Skip brace counting if we already processed this typedef (found "} name;") - if already_processed: - pre_lines = [] + continue + + # Check for typedef struct/enum/union + if stripped.startswith('typedef struct'): + name, decl_end = parse_typedef_struct(lines_info, i) + if name: + structs.append({ + 'name': name, + 'pre': pre_lines, + 'start': i, + 'end': decl_end, + 'line_count': decl_end - i + 1 + }) + i = decl_end + 1 continue - - if i >= n or not any('{' in l for l in decl_lines): - pre_lines = [] + + if stripped.startswith('typedef enum'): + name, decl_end = parse_typedef_enum(lines_info, i) + if name: + enums.append({ + 'name': name, + 'pre': pre_lines, + 'start': i, + 'end': decl_end, + 'line_count': decl_end - i + 1 + }) + i = decl_end + 1 continue + + # Check for simple struct/enum declarations + if stripped.startswith('struct ') and '{' in stripped: + name, decl_end = parse_simple_struct(lines_info, i) + if name: + structs.append({ + 'name': name, + 'pre': pre_lines, + 'start': i, + 'end': decl_end, + 'line_count': decl_end - i + 1 + }) + i = decl_end + 1 + continue + + if stripped.startswith('enum ') and '{' in stripped: + name, decl_end = parse_simple_enum(lines_info, i) + if name: + enums.append({ + 'name': name, + 'pre': pre_lines, + 'start': i, + 'end': decl_end, + 'line_count': decl_end - i + 1 + }) + i = decl_end + 1 + continue + + # Check for function declarations + if '(' in stripped: + func_info = parse_function(lines_info, i) + if func_info: + functions.append(func_info) + i = func_info['end'] + 1 + continue + + i += 1 + + return functions, structs, enums - # Now we're inside the struct/enum/union body, find the closing } - brace_level = 1 - while i < n and brace_level > 0: - i += 1 - if i >= n: - break - line = lines[i] - decl_lines.append(line) - stripped_line = line.strip() - - if skip_multi_comment(line): - continue - - # Skip preprocessor directives when counting braces - # Check for # after stripping leading whitespace (handles indented #define) - if stripped_line.startswith('#'): - continue - - # Skip extern "C" { blocks (C++ linkage) - if 'extern "C"' in stripped_line or stripped_line.startswith('extern'): - if '{' in stripped_line: - extern_c_level += 1 - continue - - # Skip closing brace of extern "C" block - if extern_c_level > 0 and stripped_line == '}': - extern_c_level -= 1 - continue - - # If we're inside extern "C", skip counting braces - if extern_c_level > 0: - continue - - # Skip do { ... } while(0) patterns in macros - if stripped_line.startswith('do {'): - continue - if stripped_line.startswith('}') and 'while(0)' in stripped_line: - continue - - brace_level += line.count('{') - line.count('}') - # Check if there's a name after } (for typedef) or if it's a simple struct - end_line = i +def parse_typedef_struct(lines_info, start_idx): + """Parse typedef struct { ... } name;""" + # Collect lines until we have complete declaration + decl_lines = [] + brace_count = 0 + + for i in range(start_idx, len(lines_info)): + line, is_code, _ = lines_info[i] + decl_lines.append(line) + + if not is_code: + continue + + stripped = line.strip() + + # Count braces (skip do-while) + if not stripped.startswith('do {'): + if not (stripped.startswith('}') and 'while(0)' in stripped): + brace_count += line.count('{') - line.count('}') + + # If we've closed all braces and found semicolon, we're done + if brace_count == 0 and stripped.endswith(';'): + # Extract name from "} name;" decl_text = ' '.join([l.strip() for l in decl_lines]) - - # Extract name and determine type - if is_typedef_struct or is_typedef_enum or is_typedef_union: - # Name is after the closing brace and before ; - # Format: typedef struct { ... } name; - close_brace = decl_text.rfind('}') - semi = decl_text.find(';', close_brace) - if semi > close_brace: - name = decl_text[close_brace + 1:semi].strip() - else: - name = decl_text[close_brace + 1:].strip() - - if is_typedef_enum: - item_type = 'enum' - enums.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1}) - else: - item_type = 'struct' - structs.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1}) - else: - # Simple struct/enum/union name { ... }; - # Name is between keyword and { - keyword = 'struct' if is_struct else ('enum' if is_enum else 'union') - keyword_pos = decl_text.find(keyword) - brace_pos = decl_text.find('{', keyword_pos) - name = decl_text[keyword_pos + len(keyword):brace_pos].strip() - - # Skip anonymous structs (no name) - they are local variables, not declarations - if not name: - pre_lines = [] - continue - - if is_enum: - enums.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1}) - else: - structs.append({'name': name, 'pre': pre_lines[:], 'start': start_line, 'end': end_line, 'line_count': end_line - start_line + 1}) - - pre_lines = [] + close_brace = decl_text.rfind('}') + semi = decl_text.find(';', close_brace) + + if close_brace > 0 and semi > close_brace: + name = decl_text[close_brace + 1:semi].strip() + return name, i + return None, i + + return None, len(lines_info) - 1 + + +def parse_typedef_enum(lines_info, start_idx): + """Parse typedef enum { ... } name;""" + # Same logic as typedef struct + decl_lines = [] + brace_count = 0 + + for i in range(start_idx, len(lines_info)): + line, is_code, _ = lines_info[i] + decl_lines.append(line) + + if not is_code: continue - - # Handle function declarations - if '(' in stripped and not stripped.startswith('typedef'): - decl_lines = [line] - start_line = i - i += 1 - paren_level = stripped.count('(') - stripped.count(')') - - while i < n and paren_level > 0: - line = lines[i] - stripped_cont = line.strip() - - if skip_multi_comment(line): - decl_lines.append(line) - i += 1 - continue - - if stripped_cont.startswith('//') or stripped_cont.startswith('#'): - break - - decl_lines.append(line) - paren_level += stripped_cont.count('(') - stripped_cont.count(')') - - # Check if we closed all parens and found opening brace on same line - if paren_level == 0 and '{' in stripped_cont: - break - - i += 1 - - # Check if current or next line starts with { - has_brace = False - if i < n: - current_line = lines[i].strip() - if '{' in current_line: - has_brace = True - elif i + 1 < n and lines[i + 1].strip().startswith('{'): - i += 1 - has_brace = True - decl_lines.append(lines[i]) - i += 1 + + stripped = line.strip() + + if not stripped.startswith('do {'): + if not (stripped.startswith('}') and 'while(0)' in stripped): + brace_count += line.count('{') - line.count('}') + + if brace_count == 0 and stripped.endswith(';'): + decl_text = ' '.join([l.strip() for l in decl_lines]) + close_brace = decl_text.rfind('}') + semi = decl_text.find(';', close_brace) - if has_brace: - decl_lines.append(lines[i]) - i += 1 - - # Find function end - brace_level = 1 - while i < n and brace_level > 0: - line = lines[i] - decl_lines.append(line) - - if skip_multi_comment(line): - i += 1 - continue - - brace_level += line.count('{') - line.count('}') - i += 1 - - end_line = i - 1 - - # Parse function signature - decl_text = ' '.join([l.strip() for l in decl_lines]) - paren_start = decl_text.find('(') - paren_end = decl_text.rfind(')', 0, decl_text.find('{')) - - if paren_start > 0 and paren_end > paren_start: - args = decl_text[paren_start + 1:paren_end].strip() - before_paren = decl_text[:paren_start].strip() - parts = before_paren.rsplit(maxsplit=1) - - if len(parts) == 2: - ret_type = parts[0] - name = parts[1] - else: - ret_type = 'void' - name = parts[0] if parts else 'unknown' - - if name not in c_keywords and name.isidentifier(): - functions.append({ - 'type': 'function', - 'name': name, - 'args': args, - 'ret': ret_type, - 'pre': pre_lines[:], - 'start': start_line, - 'end': end_line, - 'line_count': end_line - start_line + 1 - }) - - pre_lines = [] + if close_brace > 0 and semi > close_brace: + name = decl_text[close_brace + 1:semi].strip() + return name, i + return None, i + + return None, len(lines_info) - 1 + + +def parse_simple_struct(lines_info, start_idx): + """Parse struct name { ... };""" + line, is_code, _ = lines_info[start_idx] + stripped = line.strip() + + # Extract name: "struct name {" + match = re.match(r'struct\s+(\w+)\s*\{', stripped) + if not match: + return None, start_idx + + name = match.group(1) + + # Find closing brace + brace_count = 1 + for i in range(start_idx + 1, len(lines_info)): + line, is_code, _ = lines_info[i] + + if not is_code: continue - - # Not a declaration we care about - pre_lines = [] - i += 1 - - return functions, structs, enums - -if __name__ == "__main__": + + stripped = line.strip() + + if not stripped.startswith('do {'): + if not (stripped.startswith('}') and 'while(0)' in stripped): + brace_count += line.count('{') - line.count('}') + + if brace_count == 0: + return name, i + + return name, len(lines_info) - 1 + + +def parse_simple_enum(lines_info, start_idx): + """Parse enum name { ... };""" + line, is_code, _ = lines_info[start_idx] + stripped = line.strip() + + # Extract name: "enum name {" + match = re.match(r'enum\s+(\w+)\s*\{', stripped) + if not match: + return None, start_idx + + name = match.group(1) + + # Find closing brace + brace_count = 1 + for i in range(start_idx + 1, len(lines_info)): + line, is_code, _ = lines_info[i] + + if not is_code: + continue + + stripped = line.strip() + + if not stripped.startswith('do {'): + if not (stripped.startswith('}') and 'while(0)' in stripped): + brace_count += line.count('{') - line.count('}') + + if brace_count == 0: + return name, i + + return name, len(lines_info) - 1 + + +def parse_function(lines_info, start_idx): + """Parse function declaration at level 0.""" + line, is_code, pre_lines = lines_info[start_idx] + stripped = line.strip() + + # Extract function name + name, ret_type = extract_identifier_before_paren(stripped) + + if not name: + return None + + # Check if this is a forward declaration (ends with ;) + if stripped.endswith(';'): + return None + + # Find opening brace - function body must start with { + # Check current line and next line only + decl_end = start_idx + found_brace = False + + # Check current line first - look for { after ) + paren_end = stripped.rfind(')') + if paren_end > 0: + after_paren = stripped[paren_end:] + if '{' in after_paren: + decl_end = start_idx + found_brace = True + + # If not found, check next few lines (up to 3) for { + # But stop if we encounter ; (end of prototype) + if not found_brace: + for offset in range(1, 4): # Check next 3 lines + if start_idx + offset >= len(lines_info): + break + + check_line, check_is_code, _ = lines_info[start_idx + offset] + if not check_is_code: + continue + + check_stripped = check_line.strip() + + # If we hit a semicolon, this is a prototype, not a definition + if check_stripped.endswith(';'): + return None + + # If we found opening brace, this is the function body + # Check if '{' is present in the line (not necessarily at start due to formatting) + if '{' in check_line: + decl_end = start_idx + offset + found_brace = True + break + + if not found_brace: + # No body - probably forward declaration or macro, skip + return None + + # Find closing brace + closing = find_matching_brace(lines_info, decl_end + 1, 0) + + # Extract arguments - need to collect all lines from start to decl_end + # to handle multi-line function signatures + sig_lines = [] + for idx in range(start_idx, decl_end + 1): + l, is_code, _ = lines_info[idx] + if is_code: + sig_lines.append(l) + + sig_text = ' '.join(sig_lines) + paren_start = sig_text.find('(') + paren_end = sig_text.rfind(')') + if paren_start > 0 and paren_end > paren_start: + args = sig_text[paren_start + 1:paren_end].strip() + else: + args = '' + + return { + 'type': 'function', + 'name': name, + 'args': args, + 'ret': ret_type, + 'pre': pre_lines, + 'start': start_idx, + 'end': closing, + 'line_count': closing - start_idx + 1 + } + + +def main(): + if len(sys.argv) < 2: + print("Usage: c_util toc | func | struct | enum ") + sys.exit(1) + + cmd = sys.argv[1] + files = read_filelist() contents = load_files(files) + + # Parse all files project_functions = {} project_structs = {} project_enums = {} + for f, lines in contents.items(): - funcs, strs, enums = extract_functions_and_structs(lines) + funcs, strs, enums = extract_declarations(lines) project_functions[f] = funcs project_structs[f] = strs project_enums[f] = enums - if len(sys.argv) < 2: - print("Usage: python script.py toc | func | struct | enum ") - sys.exit(1) - cmd = sys.argv[1] + if cmd == 'toc': for f in files: + if f not in contents: + continue + print(f"File: {f}") + funcs = project_functions.get(f, []) - strs = project_structs.get(f, []) - enums = project_enums.get(f, []) if funcs: for func in funcs: print(f"Function: {func['ret']} {func['name']}({func['args']}) - {func['line_count']} lines") + + strs = project_structs.get(f, []) if strs: for s in strs: print(f"Struct: {s['name']} - {s['line_count']} lines") + + enums = project_enums.get(f, []) if enums: for e in enums: print(f"Enum: {e['name']} - {e['line_count']} lines") + elif cmd == 'func': if len(sys.argv) < 4: - print("Usage: python script.py func ") + print("Usage: c_util func ") sys.exit(1) + file = sys.argv[2] funcname = sys.argv[3] - found = False + if file in project_functions: for func in project_functions[file]: if func['name'] == funcname: @@ -367,45 +589,55 @@ if __name__ == "__main__": pre = func['pre'] body = lines[func['start']:func['end'] + 1] print('\n'.join(pre + body)) - found = True - break - if not found: - print("Function not found") + sys.exit(0) + + print("Function not found") + sys.exit(1) + elif cmd == 'struct': if len(sys.argv) < 4: - print("Usage: python script.py struct ") + print("Usage: c_util struct ") sys.exit(1) + file = sys.argv[2] structname = sys.argv[3] - found = False + if file in project_structs: - for strct in project_structs[file]: - if strct['name'] == structname: + for s in project_structs[file]: + if s['name'] == structname: lines = contents[file] - pre = strct['pre'] - body = lines[strct['start']:strct['end'] + 1] + pre = s['pre'] + body = lines[s['start']:s['end'] + 1] print('\n'.join(pre + body)) - found = True - break - if not found: - print("Struct not found") + sys.exit(0) + + print("Struct not found") + sys.exit(1) + elif cmd == 'enum': if len(sys.argv) < 4: - print("Usage: python script.py enum ") + print("Usage: c_util enum ") sys.exit(1) + file = sys.argv[2] enumname = sys.argv[3] - found = False + if file in project_enums: - for enm in project_enums[file]: - if enm['name'] == enumname: + for e in project_enums[file]: + if e['name'] == enumname: lines = contents[file] - pre = enm['pre'] - body = lines[enm['start']:enm['end'] + 1] + pre = e['pre'] + body = lines[e['start']:e['end'] + 1] print('\n'.join(pre + body)) - found = True - break - if not found: - print("Enum not found") + sys.exit(0) + + print("Enum not found") + sys.exit(1) + else: print("Unknown command") + sys.exit(1) + + +if __name__ == "__main__": + main()