#!/usr/bin/env python3 """ c_util - C code navigation utility Commands: toc - Show table of contents for all files description - Show selected items with comments show [name2] - Show code for functions/structs/enums edit - Edit file with checksum verification """ import sys import re C_KEYWORDS = { 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while', '_Alignas', '_Alignof', '_Atomic', '_Bool', '_Complex', '_Generic', '_Imaginary', '_Noreturn', '_Static_assert', '_Thread_local' } CONTROL_FLOW = {'if', 'for', 'while', 'switch', 'return', 'else'} def read_filelist(filename='filelist.txt'): """Read list of files to process.""" with open(filename, 'r') as f: return [line.strip() for line in f if line.strip()] def load_files(files): """Load contents of all files.""" contents = {} for f in files: try: with open(f, 'r') as fd: contents[f] = fd.read().splitlines() except Exception as e: print(f"Warning: could not read {f}: {e}", file=sys.stderr) return contents def preprocess_lines(lines): """ Preprocess lines: - Remove preprocessor directives (#...) - Remove comments (/* */ and //) - Track pre_lines (comments before declarations) Returns list of tuples: (original_line, is_code, pre_lines) """ result = [] multi_comment = False pre_lines = [] for line in lines: stripped = line.strip() # Handle multi-line comments if multi_comment: if '*/' in line: multi_comment = False pre_lines.append(line) result.append((line, False, [])) continue # Start of multi-line comment if '/*' in line: end_pos = line.find('*/') if end_pos == -1: # Comment continues to next line multi_comment = True pre_lines.append(line) result.append((line, False, [])) continue else: # Single-line block comment pre_lines.append(line) result.append((line, False, [])) continue # Single-line comment if stripped.startswith('//'): pre_lines.append(line) result.append((line, False, [])) continue # Preprocessor directive - remove but don't add to pre_lines if stripped.startswith('#'): result.append((line, False, [])) continue # Empty line - keep as separator but not code if not stripped: result.append((line, False, [])) continue # This is actual code current_pre = pre_lines[:] pre_lines = [] result.append((line, True, current_pre)) return result def compute_brace_levels(lines_info): """ Compute brace nesting level for each line. Returns list of levels (level before processing the line). """ levels = [] brace_level = 0 for line, is_code, _ in lines_info: levels.append(brace_level) if not is_code: continue stripped = line.strip() # Skip preprocessor (already filtered but double-check) if stripped.startswith('#'): continue # Skip extern "C" { blocks (C++ linkage) if 'extern "C"' in stripped: # Count braces in extern "C" line separately if '{' in stripped: # Don't count this brace as it opens/closes C linkage block pass continue # Skip do { ... } while(0) - not real nesting if stripped.startswith('do {'): continue if stripped.startswith('}') and 'while(0)' in stripped: continue # Count braces brace_level += line.count('{') - line.count('}') return levels def extract_identifier_before_paren(line): """ Extract function name before first '('. Returns (name, return_type) or (None, None) if invalid. """ paren_pos = line.find('(') if paren_pos <= 0: return None, None before = line[:paren_pos].strip() # Split into parts (handle pointers, qualifiers, etc.) # e.g., "static int * func_name" -> ["static", "int", "*", "func_name"] parts = before.split() if not parts: return None, None # Last part should be function name name = parts[-1].strip('*').strip() # Check if it's a valid identifier if not name.isidentifier(): return None, None # Check if it's a keyword if name in C_KEYWORDS or name in CONTROL_FLOW: return None, None # Return type is everything except name ret_type = ' '.join(parts[:-1]) if len(parts) > 1 else 'void' return name, ret_type def find_matching_brace(lines_info, start_idx, start_level): """ Find line index of matching closing brace. Starts from start_idx, looking for level to return to start_level. """ level = start_level for i in range(start_idx, len(lines_info)): line, is_code, _ = lines_info[i] if not is_code: continue stripped = line.strip() # Skip preprocessor if stripped.startswith('#'): continue # Skip do-while if stripped.startswith('do {'): continue if stripped.startswith('}') and 'while(0)' in stripped: continue # Check current level before this line if level == start_level and i > start_idx and '}' in line: return i # Update level level += line.count('{') - line.count('}') return len(lines_info) - 1 def extract_declarations(lines): """ Extract all function, struct, and enum declarations. Returns (functions, structs, enums). """ functions = [] structs = [] enums = [] # Preprocess lines_info = preprocess_lines(lines) # Compute brace levels brace_levels = compute_brace_levels(lines_info) i = 0 n = len(lines_info) while i < n: line, is_code, pre_lines = lines_info[i] if not is_code: i += 1 continue stripped = line.strip() level = brace_levels[i] # Only process declarations at level 0 if level != 0: i += 1 continue # Check for typedef struct/enum/union if stripped.startswith('typedef struct'): name, decl_end = parse_typedef_struct(lines_info, i) if name: structs.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue if stripped.startswith('typedef enum'): name, decl_end = parse_typedef_enum(lines_info, i) if name: enums.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue # Check for simple struct/enum declarations if stripped.startswith('struct ') and '{' in stripped: name, decl_end = parse_simple_struct(lines_info, i) if name: structs.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue if stripped.startswith('enum ') and '{' in stripped: name, decl_end = parse_simple_enum(lines_info, i) if name: enums.append({ 'name': name, 'pre': pre_lines, 'start': i, 'end': decl_end, 'line_count': decl_end - i + 1 }) i = decl_end + 1 continue # Check for function declarations if '(' in stripped: func_info = parse_function(lines_info, i) if func_info: functions.append(func_info) i = func_info['end'] + 1 continue i += 1 return functions, structs, enums def parse_typedef_struct(lines_info, start_idx): """Parse typedef struct { ... } name;""" # Collect lines until we have complete declaration decl_lines = [] brace_count = 0 for i in range(start_idx, len(lines_info)): line, is_code, _ = lines_info[i] decl_lines.append(line) if not is_code: continue stripped = line.strip() # Count braces (skip do-while) if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') # If we've closed all braces and found semicolon, we're done if brace_count == 0 and stripped.endswith(';'): # Extract name from "} name;" decl_text = ' '.join([l.strip() for l in decl_lines]) close_brace = decl_text.rfind('}') semi = decl_text.find(';', close_brace) if close_brace > 0 and semi > close_brace: name = decl_text[close_brace + 1:semi].strip() return name, i return None, i return None, len(lines_info) - 1 def parse_typedef_enum(lines_info, start_idx): """Parse typedef enum { ... } name;""" # Same logic as typedef struct decl_lines = [] brace_count = 0 for i in range(start_idx, len(lines_info)): line, is_code, _ = lines_info[i] decl_lines.append(line) if not is_code: continue stripped = line.strip() if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') if brace_count == 0 and stripped.endswith(';'): decl_text = ' '.join([l.strip() for l in decl_lines]) close_brace = decl_text.rfind('}') semi = decl_text.find(';', close_brace) if close_brace > 0 and semi > close_brace: name = decl_text[close_brace + 1:semi].strip() return name, i return None, i return None, len(lines_info) - 1 def parse_simple_struct(lines_info, start_idx): """Parse struct name { ... };""" line, is_code, _ = lines_info[start_idx] stripped = line.strip() # Extract name: "struct name {" match = re.match(r'struct\s+(\w+)\s*\{', stripped) if not match: return None, start_idx name = match.group(1) # Find closing brace brace_count = 1 for i in range(start_idx + 1, len(lines_info)): line, is_code, _ = lines_info[i] if not is_code: continue stripped = line.strip() if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') if brace_count == 0: return name, i return name, len(lines_info) - 1 def parse_simple_enum(lines_info, start_idx): """Parse enum name { ... };""" line, is_code, _ = lines_info[start_idx] stripped = line.strip() # Extract name: "enum name {" match = re.match(r'enum\s+(\w+)\s*\{', stripped) if not match: return None, start_idx name = match.group(1) # Find closing brace brace_count = 1 for i in range(start_idx + 1, len(lines_info)): line, is_code, _ = lines_info[i] if not is_code: continue stripped = line.strip() if not stripped.startswith('do {'): if not (stripped.startswith('}') and 'while(0)' in stripped): brace_count += line.count('{') - line.count('}') if brace_count == 0: return name, i return name, len(lines_info) - 1 def parse_function(lines_info, start_idx): """Parse function declaration at level 0.""" line, is_code, pre_lines = lines_info[start_idx] stripped = line.strip() # Extract function name name, ret_type = extract_identifier_before_paren(stripped) if not name: return None # Check if this is a forward declaration (ends with ;) if stripped.endswith(';'): return None # Find opening brace - function body must start with { # Check current line and next line only decl_end = start_idx found_brace = False # Check current line first - look for { after ) paren_end = stripped.rfind(')') if paren_end > 0: after_paren = stripped[paren_end:] if '{' in after_paren: decl_end = start_idx found_brace = True # If not found, check next few lines (up to 3) for { # But stop if we encounter ; (end of prototype) if not found_brace: for offset in range(1, 4): # Check next 3 lines if start_idx + offset >= len(lines_info): break check_line, check_is_code, _ = lines_info[start_idx + offset] if not check_is_code: continue check_stripped = check_line.strip() # If we hit a semicolon, this is a prototype, not a definition if check_stripped.endswith(';'): return None # If we found opening brace, this is the function body # Check if '{' is present in the line (not necessarily at start due to formatting) if '{' in check_line: decl_end = start_idx + offset found_brace = True break if not found_brace: # No body - probably forward declaration or macro, skip return None # Find closing brace closing = find_matching_brace(lines_info, decl_end + 1, 0) # Extract arguments - need to collect all lines from start to decl_end # to handle multi-line function signatures sig_lines = [] for idx in range(start_idx, decl_end + 1): l, is_code, _ = lines_info[idx] if is_code: sig_lines.append(l) sig_text = ' '.join(sig_lines) paren_start = sig_text.find('(') paren_end = sig_text.rfind(')') if paren_start > 0 and paren_end > paren_start: args = sig_text[paren_start + 1:paren_end].strip() else: args = '' return { 'type': 'function', 'name': name, 'args': args, 'ret': ret_type, 'pre': pre_lines, 'start': start_idx, 'end': closing, 'decl_end': decl_end, # Store the line with opening brace 'line_count': closing - start_idx + 1 } def line_checksum(line): """Calculate checksum: sum of all character codes modulo 256, as 2 hex digits.""" checksum = sum(ord(c) for c in line) % 256 return f"{checksum:02X}" def autolearn_indentation(lines, context_start=0, context_end=None): """ Analyze indentation style in the file. Returns dict with: - indent_char: '\t' or ' ' - indent_size: number of spaces per level (for spaces) or 1 (for tabs) - base_indent: base indentation string for the context """ if context_end is None: context_end = len(lines) # Count tab vs space indented lines tab_lines = 0 space_lines = 0 space_counts = [] for i in range(context_start, min(context_end, len(lines))): line = lines[i] if not line.strip(): continue leading = line[:len(line) - len(line.lstrip())] if '\t' in leading: tab_lines += 1 elif ' ' in leading: space_lines += 1 # Count leading spaces space_count = len(leading) if space_count > 0: space_counts.append(space_count) # Determine indent character if tab_lines > space_lines: indent_char = '\t' indent_size = 1 else: indent_char = ' ' # Calculate most common indent size (GCD of space counts) if space_counts: def gcd(a, b): while b: a, b = b, a % b return a indent_size = space_counts[0] for count in space_counts[1:]: indent_size = gcd(indent_size, count) if indent_size == 1: break # Default to 4 if we couldn't determine if indent_size < 2: indent_size = 4 else: indent_size = 4 return { 'indent_char': indent_char, 'indent_size': indent_size, 'indent_str': indent_char * indent_size if indent_char == ' ' else '\t' } def normalize_whitespace(text): """Normalize whitespace: replace multiple spaces/newlines with single space.""" # Replace all whitespace (spaces, tabs, newlines) with single space text = re.sub(r'\s+', ' ', text) # Strip leading/trailing whitespace return text.strip() def get_function_signature(lines, func): """Get normalized function signature (like prototype).""" # Get all lines from start to the line with opening brace start = func['start'] end = func.get('decl_end', func['start']) # We need to track decl_end in parse_function # Collect signature lines sig_lines = [] for i in range(start, min(end + 1, len(lines))): line = lines[i] # Remove comments if '//' in line: line = line[:line.index('//')] sig_lines.append(line) # Join and normalize sig = ' '.join(sig_lines) sig = normalize_whitespace(sig) # Remove the opening brace if present if '{' in sig: sig = sig[:sig.index('{')].strip() # Ensure it ends with semicolon if not sig.endswith(';'): sig += ';' return sig def get_declaration_text(lines, decl, include_comments=False): """Get normalized declaration text for struct/enum.""" start = decl['start'] end = decl['end'] # Collect all lines decl_lines = [] for i in range(start, min(end + 1, len(lines))): line = lines[i] if not include_comments: # Remove comments if '//' in line: line = line[:line.index('//')] decl_lines.append(line) if include_comments: # Normalize: one tab indent, remove empty lines, normalize whitespace result_lines = [] for line in decl_lines: # Skip empty lines if not line.strip(): continue # Normalize whitespace and use one tab for indentation normalized = normalize_whitespace(line) if normalized: result_lines.append('\t' + normalized) return '\n'.join(result_lines) else: # Join and normalize for toc mode text = ' '.join(decl_lines) text = normalize_whitespace(text) return text def get_function_comments(pre_lines): """Extract function comments from pre_lines. Returns: - Last /* */ block if found (including single-line /* */) - Or consecutive // lines (without empty lines or non-comment lines) - Empty string if no suitable comments """ if not pre_lines: return "" # Look for last /* */ block last_block_comment = "" in_block = False block_start = -1 for i, line in enumerate(pre_lines): stripped = line.strip() if stripped.startswith('/*'): # Check if it's a single-line comment /* ... */ if stripped.endswith('*/'): # Single-line block comment last_block_comment = line else: # Start of multi-line block comment in_block = True block_start = i elif in_block and stripped.endswith('*/'): in_block = False # Extract the block block_lines = pre_lines[block_start:i+1] last_block_comment = '\n'.join(block_lines) elif stripped.startswith('*/'): in_block = False if last_block_comment: return last_block_comment # Look for consecutive // lines (from the end, backwards) comment_lines = [] for line in reversed(pre_lines): stripped = line.strip() if stripped.startswith('//'): comment_lines.insert(0, stripped) elif not stripped: # Skip empty lines continue else: # Non-comment line breaks the sequence break return '\n'.join(comment_lines) if comment_lines else "" def main(): if len(sys.argv) < 2: print("Usage: c_util toc | description [name2] ... | show [name2] ... | edit ...") sys.exit(1) cmd = sys.argv[1] files = read_filelist() contents = load_files(files) # Parse all files project_functions = {} project_structs = {} project_enums = {} for f, lines in contents.items(): funcs, strs, enums = extract_declarations(lines) project_functions[f] = funcs project_structs[f] = strs project_enums[f] = enums if cmd == 'toc': first_file = True for f in files: if f not in contents: continue # Get all declarations for this file declarations = [] # Add functions for func in project_functions.get(f, []): sig = get_function_signature(contents[f], func) declarations.append((func['start'], func['end'], sig)) # Add structs for s in project_structs.get(f, []): text = get_declaration_text(contents[f], s) declarations.append((s['start'], s['end'], text)) # Add enums for e in project_enums.get(f, []): text = get_declaration_text(contents[f], e) declarations.append((e['start'], e['end'], text)) if not declarations: continue # Sort by line number declarations.sort(key=lambda x: x[0]) # Print empty line between files (except before first) if not first_file: print() first_file = False # Print filename header print(f"{f}:") # Print in new format for start, end, text in declarations: print(f"[{start + 1}-{end + 1}] {text}") elif cmd == 'description': # Get list of names to look for if len(sys.argv) < 3: print("Usage: c_util description [name2] ...") sys.exit(1) target_names = set(sys.argv[2:]) first_file = True for f in files: if f not in contents: continue # Collect all matching declarations with full info declarations = [] # Check functions for func in project_functions.get(f, []): if func['name'] in target_names: # Get signature sig = get_function_signature(contents[f], func) # Get comments comments = get_function_comments(func['pre']) declarations.append((func['start'], func['end'], 'function', sig, comments)) # Check structs for s in project_structs.get(f, []): if s['name'] in target_names: # Get full text with formatting and comments text = get_declaration_text(contents[f], s, include_comments=True) declarations.append((s['start'], s['end'], 'struct', s['name'], text)) # Check enums for e in project_enums.get(f, []): if e['name'] in target_names: # Get full text with formatting and comments text = get_declaration_text(contents[f], e, include_comments=True) declarations.append((e['start'], e['end'], 'enum', e['name'], text)) if not declarations: continue # Sort by line number declarations.sort(key=lambda x: x[0]) # Print empty line between files (except before first) if not first_file: print() first_file = False # Print filename header print(f"{f}:") # Print declarations with full formatting for start, end, decl_type, content, extra in declarations: if decl_type == 'function': # Print comments first if any (without line numbers) if extra: print(extra) # Print function signature with line number and checksum lines = contents[f] for line_idx in range(start, end + 1): if line_idx < len(lines): line = lines[line_idx] line_num = line_idx + 1 checksum = line_checksum(line) print(f"{line_num} {checksum}: {line}") else: # struct or enum - print with line numbers and checksums lines = contents[f] for line_idx in range(start, end + 1): if line_idx < len(lines): line = lines[line_idx] line_num = line_idx + 1 checksum = line_checksum(line) print(f"{line_num} {checksum}: {line}") elif cmd == 'show': if len(sys.argv) < 3: print("Usage: c_util show [name2] ...") sys.exit(1) target_names = set(sys.argv[2:]) first_file = True found_any = False for f in files: if f not in contents: continue # Collect all matching items items = [] # Check functions for func in project_functions.get(f, []): if func['name'] in target_names: items.append(('function', func)) found_any = True # Check structs for s in project_structs.get(f, []): if s['name'] in target_names: items.append(('struct', s)) found_any = True # Check enums for e in project_enums.get(f, []): if e['name'] in target_names: items.append(('enum', e)) found_any = True if not items: continue # Sort by line number items.sort(key=lambda x: x[1]['start']) # Print empty line between files (except before first) if not first_file: print() first_file = False # Print filename header print(f"{f}:") # Print items with line numbers and checksums for item_type, item in items: lines = contents[f] start = item['start'] end = item['end'] # Print pre lines (comments) without line numbers pre = item['pre'] if pre: for pre_line in pre: print(pre_line) # Print body with line numbers and checksums for line_idx in range(start, end + 1): if line_idx < len(lines): line = lines[line_idx] line_num = line_idx + 1 checksum = line_checksum(line) print(f"{line_num} {checksum}: {line}") if not found_any: print("Not found") sys.exit(1) elif cmd == 'edit': # Format: c_util edit path/file.c start_line checksum1 [checksum2 ...] <<'EOF' if len(sys.argv) < 5: print("Usage: c_util edit [checksum2 ...]") print("Then provide new code block via stdin or heredoc") sys.exit(1) file_path = sys.argv[2] # Load the file if not already loaded if file_path not in contents: try: with open(file_path, 'r') as f: contents[file_path] = f.read().splitlines() except Exception as e: print(f"Error reading file {file_path}: {e}", file=sys.stderr) sys.exit(1) file_lines = contents[file_path] try: start_line = int(sys.argv[3]) except ValueError: print(f"Invalid line number: {sys.argv[3]}") sys.exit(1) # Get expected checksums expected_checksums = [] for checksum_str in sys.argv[4:]: try: expected_checksums.append(int(checksum_str, 16)) except ValueError: print(f"Invalid checksum format: {checksum_str}") sys.exit(1) num_lines_to_replace = len(expected_checksums) # Verify line numbers are valid if start_line < 1 or start_line > len(file_lines): print(f"Line number {start_line} out of range (1-{len(file_lines)})") sys.exit(1) if start_line + num_lines_to_replace - 1 > len(file_lines): print(f"Block extends beyond end of file") sys.exit(1) # Verify checksums actual_checksums = [] for i in range(num_lines_to_replace): line_idx = start_line - 1 + i actual_checksum = sum(ord(c) for c in file_lines[line_idx]) % 256 actual_checksums.append(actual_checksum) expected = expected_checksums[i] if actual_checksum != expected: print(f"Checksum mismatch at line {line_idx + 1}:") print(f" Expected: {expected:02X}") print(f" Actual: {actual_checksum:02X}") print(f" Line: {repr(file_lines[line_idx])}") sys.exit(1) # Read new code from stdin print("Reading new code block from stdin...", file=sys.stderr) new_code_lines = sys.stdin.read().splitlines() # Determine indentation style from file indent_info = autolearn_indentation(file_lines, max(0, start_line - 10), min(len(file_lines), start_line + num_lines_to_replace + 10)) # Calculate base indentation from the first line being replaced first_old_line = file_lines[start_line - 1] base_indent = first_old_line[:len(first_old_line) - len(first_old_line.lstrip())] # Determine indentation level from surrounding code # Count braces before the replacement block brace_level = 0 for i in range(max(0, start_line - 20), start_line - 1): line = file_lines[i] # Skip preprocessor and comments stripped = line.strip() if stripped.startswith('#'): continue # Count braces (simple approach) brace_level += line.count('{') - line.count('}') # Apply indentation to new code indented_new_lines = [] current_level = brace_level for i, line in enumerate(new_code_lines): stripped = line.strip() # Empty line - keep as is if not stripped: indented_new_lines.append('') continue # Preprocessor directive - no indentation if stripped.startswith('#'): indented_new_lines.append(stripped) continue # Check for closing brace - reduce level before this line if stripped.startswith('}'): current_level -= 1 # Calculate indentation if indent_info['indent_char'] == '\t': indent = '\t' * current_level else: indent = ' ' * (indent_info['indent_size'] * current_level) indented_new_lines.append(indent + stripped) # Check for opening brace - increase level for next lines if stripped.endswith('{'): current_level += 1 # Replace the block start_idx = start_line - 1 end_idx = start_idx + num_lines_to_replace new_file_lines = file_lines[:start_idx] + indented_new_lines + file_lines[end_idx:] # Write back to file try: with open(file_path, 'w') as f: f.write('\n'.join(new_file_lines)) if new_file_lines and not new_file_lines[-1].endswith('\n'): f.write('\n') print(f"Successfully edited {file_path}") print(f"Replaced {num_lines_to_replace} lines with {len(indented_new_lines)} lines") except Exception as e: print(f"Error writing file {file_path}: {e}", file=sys.stderr) sys.exit(1) else: print("Unknown command") sys.exit(1) if __name__ == "__main__": main()