Browse Source

+bgp diagnostics

nodeinfo-routing-update
jeka 2 weeks ago
parent
commit
3b3b411c70
  1. 6
      src/etcp.c
  2. 21
      src/route_bgp.c
  3. 57
      src/route_lib.c

6
src/etcp.c

@ -176,10 +176,8 @@ void etcp_connection_close(struct ETCP_CONN* etcp) {
routing_del_conn(etcp);
// Notify BGP about connection closure (send withdraws, remove from senders_list)
if (etcp->instance && etcp->instance->bgp) {
route_bgp_remove_conn(etcp);
}
// NOTE: route_bgp_remove_conn is already called via down_cbk in etcp_on_down() above
// DO NOT call it again here to avoid double-processing and ref_count corruption
// Deinitialize packet normalizer (this will call routing_del_conn)
if (etcp->normalizer) {

21
src/route_bgp.c

@ -452,9 +452,26 @@ void route_bgp_remove_conn(struct ETCP_CONN* conn) {
return;
}
// SAFETY: проверяем что conn ещё есть в senders_list
struct ROUTE_BGP* bgp = conn->instance->bgp;
bool found_in_list = false;
struct ll_entry* e = bgp->senders_list->head;
while (e) {
struct ROUTE_BGP_CONN_ITEM* item = (struct ROUTE_BGP_CONN_ITEM*)e->data;
if (item->conn == conn) {
found_in_list = true;
break;
}
e = e->next;
}
if (!found_in_list) {
DEBUG_WARN(DEBUG_CATEGORY_BGP, "route_bgp_remove_conn: conn already removed or not in list, skipping");
return;
}
DEBUG_INFO(DEBUG_CATEGORY_BGP, "route_bgp_remove_conn: peer=%016llx", (unsigned long long)conn->peer_node_id);
struct ROUTE_BGP* bgp = conn->instance->bgp;
struct ROUTE_TABLE* rt = conn->instance->rt;
// ← КЛЮЧЕВОЕ ИЗМЕНЕНИЕ: теперь используем route_remove_conn
@ -463,7 +480,7 @@ void route_bgp_remove_conn(struct ETCP_CONN* conn) {
}
// Удаляем из списка рассылки
struct ll_entry* e = bgp->senders_list->head;
e = bgp->senders_list->head;
while (e) {
struct ROUTE_BGP_CONN_ITEM* item = (struct ROUTE_BGP_CONN_ITEM*)e->data;
if (item->conn == conn) {

57
src/route_lib.c

@ -517,20 +517,43 @@ void route_remove_conn(struct ROUTE_TABLE *table, struct ETCP_CONN *conn) {
DEBUG_ERROR(DEBUG_CATEGORY_ROUTING, "route_remove_conn: conn is NULL");
return;
}
DEBUG_TRACE(DEBUG_CATEGORY_ROUTING, "route_remove_conn: conn=%p", (void*)conn);
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: conn=%p peer=%016llx table_count=%zu",
(void*)conn, (unsigned long long)conn->peer_node_id, table->count);
// DIAGNOSTIC: поиск conn во всех entries перед началом
size_t entries_with_conn = 0;
for (size_t i = 0; i < table->count; i++) {
struct NODE_CONNS_INFO *info = table->entries[i].conn_list;
if (!info) continue;
for (uint8_t j = 0; j < info->conninfo_count; j++) {
if (info->conn_info[j].conn_id == conn) {
entries_with_conn++;
DEBUG_TRACE(DEBUG_CATEGORY_ROUTING, " pre_check: entry[%zu] node_id=%016llx conninfo_count=%d has_conn_at_index=%d",
i, (unsigned long long)info->node_id, info->conninfo_count, j);
}
}
}
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: pre_check found conn in %zu entries", entries_with_conn);
if (entries_with_conn == 0) {
DEBUG_WARN(DEBUG_CATEGORY_ROUTING, "route_remove_conn: conn NOT FOUND in any entry - skipping");
return;
}
struct NODE_CONNS_INFO* affected[512];
bool needs_reroute[512] = {0};
size_t aff_count = 0;
// Pass 1: удаляем conn (один раз на node_id)
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: Pass1 - removing conn from NODE_CONNS_INFO");
for (size_t i = 0; i < table->count; i++) {
struct NODE_CONNS_INFO *info = table->entries[i].conn_list;
if (!info) continue;
bool has_conn = false;
uint8_t conn_idx = 0;
for (uint8_t j = 0; j < info->conninfo_count; j++) {
if (info->conn_info[j].conn_id == conn) { has_conn = true; break; }
if (info->conn_info[j].conn_id == conn) { has_conn = true; conn_idx = j; break; }
}
if (!has_conn) continue;
@ -538,41 +561,64 @@ void route_remove_conn(struct ROUTE_TABLE *table, struct ETCP_CONN *conn) {
for (size_t k = 0; k < aff_count; k++) {
if (affected[k] == info) { already = true; break; }
}
if (already) continue;
if (already) {
DEBUG_TRACE(DEBUG_CATEGORY_ROUTING, " entry[%zu] node_id=%016llx: already processed", i, (unsigned long long)info->node_id);
continue;
}
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, " entry[%zu] node_id=%016llx: removing conn at index=%d, conninfo_count was=%d ref_count=%d",
i, (unsigned long long)info->node_id, conn_idx, info->conninfo_count, info->ref_count);
bool rer = node_conns_info_remove(info, conn);
affected[aff_count] = info;
needs_reroute[aff_count] = rer;
aff_count++;
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, " after remove: conninfo_count=%d ref_count=%d was_preferred=%s",
info->conninfo_count, info->ref_count, rer ? "yes" : "no");
if (aff_count >= 512) break;
}
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: Pass1 complete - affected %zu NODE_CONNS_INFO", aff_count);
// Pass 2: удаляем записи без оставшихся подключений
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: Pass2 - compacting entries");
size_t j = 0;
size_t entries_removed = 0;
for (size_t i = 0; i < table->count; i++) {
struct ROUTE_ENTRY *e = &table->entries[i];
if (e->conn_list && e->conn_list->conninfo_count == 0) {
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, " removing entry[%zu] network=%s/%d node_id=%016llx ref_count=%d",
i, ip_to_string(e->network).a, e->prefix_length,
(unsigned long long)e->conn_list->node_id, e->conn_list->ref_count);
if (table->change_callback)
table->change_callback(table, e, 2, 0, table->change_callback_arg);
struct NODE_CONNS_INFO *info = e->conn_list;
e->conn_list = NULL;
if (--info->ref_count == 0)
if (--info->ref_count == 0) {
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, " destroying NODE_CONNS_INFO (ref_count=0)");
node_conns_info_destroy(info);
} else {
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, " NODE_CONNS_INFO ref_count now=%d (still referenced)", info->ref_count);
}
entries_removed++;
continue;
}
if (i != j) table->entries[j] = table->entries[i];
j++;
}
table->count = j;
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: Pass2 complete - removed %zu entries, new_count=%zu", entries_removed, table->count);
// Pass 3: reroute только если изменился preferred_conn
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: Pass3 - reroute check");
for (size_t i = 0; i < table->count; i++) {
struct ROUTE_ENTRY *e = &table->entries[i];
if (!e->conn_list) continue;
for (size_t k = 0; k < aff_count; k++) {
if (affected[k] == e->conn_list && needs_reroute[k]) {
e->last_update = get_time_tb();
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, " entry[%zu] network=%s/%d: triggering REROUTE callback",
i, ip_to_string(e->network).a, e->prefix_length);
if (table->change_callback)
table->change_callback(table, e, 1, 0, table->change_callback_arg);
break;
@ -580,8 +626,7 @@ void route_remove_conn(struct ROUTE_TABLE *table, struct ETCP_CONN *conn) {
}
}
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: processed %zu nodes", aff_count);
DEBUG_TRACE(DEBUG_CATEGORY_ROUTING, "route_remove_conn: DONE");
DEBUG_INFO(DEBUG_CATEGORY_ROUTING, "route_remove_conn: DONE - affected %zu nodes, final_count=%zu", aff_count, table->count);
}
/* ====================== НОВАЯ ФУНКЦИЯ ДЛЯ WITHDRAW ====================== */

Loading…
Cancel
Save