paulo@0: /* paulo@0: * $Id: gt_netorg.c,v 1.47 2005/01/04 15:00:51 mkern Exp $ paulo@0: * paulo@0: * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) paulo@0: * paulo@0: * This program is free software; you can redistribute it and/or modify it paulo@0: * under the terms of the GNU General Public License as published by the paulo@0: * Free Software Foundation; either version 2, or (at your option) any paulo@0: * later version. paulo@0: * paulo@0: * This program is distributed in the hope that it will be useful, but paulo@0: * WITHOUT ANY WARRANTY; without even the implied warranty of paulo@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU paulo@0: * General Public License for more details. paulo@0: */ paulo@0: paulo@0: #include "gt_gnutella.h" paulo@0: paulo@0: #include "gt_node.h" paulo@0: #include "gt_node_list.h" paulo@0: #include "gt_netorg.h" paulo@0: paulo@0: #include "gt_connect.h" paulo@0: #include "gt_accept.h" paulo@0: paulo@0: #include "gt_packet.h" paulo@0: paulo@0: #include "gt_node_cache.h" paulo@0: #include "gt_web_cache.h" paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* how often we check the network's condition */ paulo@0: #define MAINTAIN_INTERVAL (10 * SECONDS) paulo@0: paulo@0: /* how often to check to disconnect idle nodes */ paulo@0: #define IDLE_DISCONNECT_INTERVAL (2 * MINUTES) paulo@0: paulo@0: /* how often to trim the node list */ paulo@0: #define CLEANUP_INTERVAL (15 * MINUTES) paulo@0: paulo@0: /* how often to clear indications of connecting to nodes */ paulo@0: #define RETRY_ALL_INTERVAL (60 * MINUTES) paulo@0: paulo@0: /* maximum number of unreplied pings before disconnecting from a node */ paulo@0: #define MAX_UNREPLIED_PINGS 10 paulo@0: paulo@0: /* how many connections attempts each maintain loop for nodes previously paulo@0: * registered */ paulo@0: #define TRY_CONNECT_NODE_LIST gt_config_get_int("connect/node_list=3") paulo@0: paulo@0: /* how many connection attempts for nodes in the pong cache */ paulo@0: #define TRY_CONNECT_NODE_CACHE gt_config_get_int("connect/node_cache=7") paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* timer for initiating/closing connections */ paulo@0: static timer_id maintain_timer; paulo@0: paulo@0: /* timer for disconnecting connections */ paulo@0: static timer_id disconnect_timer; paulo@0: paulo@0: /* timer for disconnecting idle nodes */ paulo@0: static timer_id idle_disconnect_timer; paulo@0: paulo@0: /* timer for cleaning up the node list */ paulo@0: static timer_id cleanup_timer; paulo@0: paulo@0: /* timer to clear 'tried' indicators to retry connecting */ paulo@0: static timer_id retry_all_timer; paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static GtNode *node_disconnect_one (TCPC *c, GtNode *node, void *udata) paulo@0: { paulo@0: GT->DBGFN (GT, "[%s]: disconnecting", net_ip_str (GT_NODE(c)->ip)); paulo@0: gt_node_disconnect (c); paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: static GtNode *node_ping (TCPC *c, GtNode *node, GtPacket *packet) paulo@0: { paulo@0: gt_packet_send (c, packet); paulo@0: paulo@0: /* ->pings_with_noreply gets set to zero when the node sends a pong */ paulo@0: if (gt_packet_ttl (packet) == 1) paulo@0: node->pings_with_noreply++; paulo@0: paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: static void ping_hosts_ttl (uint8_t ttl) paulo@0: { paulo@0: GtPacket *packet; paulo@0: paulo@0: if (!(packet = gt_packet_new (GT_MSG_PING, ttl, NULL))) paulo@0: return; paulo@0: paulo@0: gt_conn_foreach (GT_CONN_FOREACH(node_ping), packet, paulo@0: GT_NODE_NONE, GT_NODE_CONNECTED, 0); paulo@0: paulo@0: gt_packet_free (packet); paulo@0: } paulo@0: paulo@0: static void ping_hosts (time_t now) paulo@0: { paulo@0: static time_t last_ping; paulo@0: static time_t last_keep_alive; paulo@0: BOOL need_connections; paulo@0: uint8_t ttl; paulo@0: paulo@0: need_connections = gt_conn_need_connections (GT_NODE_ULTRA); paulo@0: paulo@0: if (now - last_ping < 30 * SECONDS && !need_connections) paulo@0: return; paulo@0: paulo@0: last_ping = now; paulo@0: paulo@0: /* ping to get more hosts if we need connections */ paulo@0: if (now - last_keep_alive >= 1 * MINUTES) paulo@0: { paulo@0: /* do a keepalive */ paulo@0: ttl = 1; paulo@0: last_keep_alive = now; paulo@0: } paulo@0: else paulo@0: { paulo@0: /* get more hosts */ paulo@0: ttl = 7; paulo@0: } paulo@0: paulo@0: ping_hosts_ttl (ttl); paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static void disconnect_no_query_route (void) paulo@0: { paulo@0: int nr_supernodes; paulo@0: paulo@0: /* only disconnect if theres other nodes to fallback on */ paulo@0: nr_supernodes = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED); paulo@0: paulo@0: if (nr_supernodes > 0) paulo@0: { paulo@0: gt_conn_foreach (node_disconnect_one, NULL, paulo@0: GT_NODE_LEAF, GT_NODE_CONNECTED, 0); paulo@0: } paulo@0: } paulo@0: paulo@0: static void report_connected_leaf (int connected) paulo@0: { paulo@0: static int last_connected = 0; paulo@0: paulo@0: if (connected != last_connected) paulo@0: { paulo@0: GT->DBGFN (GT, "connected=%d nodes=%d", connected, paulo@0: gt_conn_length (GT_NODE_NONE, GT_NODE_ANY)); paulo@0: last_connected = connected; paulo@0: } paulo@0: } paulo@0: paulo@0: static int get_need_as_ultra (gt_node_class_t klass) paulo@0: { paulo@0: switch (klass) paulo@0: { paulo@0: case GT_NODE_ULTRA: return GT_PEER_CONNECTIONS; paulo@0: case GT_NODE_LEAF: return GT_LEAF_CONNECTIONS; paulo@0: default: return 0; paulo@0: } paulo@0: } paulo@0: paulo@0: static int get_need_as_leaf (gt_node_class_t klass) paulo@0: { paulo@0: switch (klass) paulo@0: { paulo@0: case GT_NODE_ULTRA: return GT_SHIELDED_CONNECTIONS; paulo@0: case GT_NODE_LEAF: return 0; /* no leaf<->leaf connections allowed */ paulo@0: default: return 0; paulo@0: } paulo@0: } paulo@0: paulo@0: int gt_conn_need_connections (gt_node_class_t klass) paulo@0: { paulo@0: int connected; paulo@0: int desired; paulo@0: paulo@0: connected = gt_conn_length (klass, GT_NODE_CONNECTED); paulo@0: paulo@0: /* don't call this with multiple classes -- the need of one paulo@0: * class could cancel a surplus of the other */ paulo@0: assert (klass == GT_NODE_ULTRA || klass == GT_NODE_LEAF); paulo@0: paulo@0: if (GT_SELF->klass & GT_NODE_ULTRA) paulo@0: desired = get_need_as_ultra (klass); paulo@0: else paulo@0: desired = get_need_as_leaf (klass); paulo@0: paulo@0: return desired - connected; paulo@0: } paulo@0: paulo@0: static void disconnect_hosts (gt_node_class_t klass, int excess) paulo@0: { paulo@0: int connected; paulo@0: paulo@0: connected = gt_conn_length (klass, GT_NODE_CONNECTED); paulo@0: paulo@0: GT->DBGFN (GT, "too many connections (%d)[%s], disconnecting %d", paulo@0: connected, gt_node_class_str (klass), excess); paulo@0: paulo@0: while (excess-- > 0) paulo@0: { paulo@0: GtNode *node = gt_conn_random (klass, GT_NODE_CONNECTED); paulo@0: paulo@0: /* TODO: send BYE message here */ paulo@0: paulo@0: assert (GT_CONN(node) != NULL); paulo@0: gt_node_disconnect (GT_CONN(node)); paulo@0: } paulo@0: } paulo@0: paulo@0: static BOOL disconnect_excess_timer (void *udata) paulo@0: { paulo@0: int leaf_excess; paulo@0: int ultra_excess; paulo@0: paulo@0: leaf_excess = gt_conn_need_connections (GT_NODE_LEAF); paulo@0: ultra_excess = gt_conn_need_connections (GT_NODE_ULTRA); paulo@0: paulo@0: if (leaf_excess < 0) paulo@0: disconnect_hosts (GT_NODE_LEAF, -leaf_excess); paulo@0: paulo@0: if (ultra_excess < 0) paulo@0: disconnect_hosts (GT_NODE_ULTRA, -ultra_excess); paulo@0: paulo@0: disconnect_timer = 0; paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: static GtNode *collect_each_node (TCPC *c, GtNode *node, List **nodes) paulo@0: { paulo@0: if (node->tried_connect) paulo@0: return NULL; paulo@0: paulo@0: if (!node->gt_port) paulo@0: return NULL; paulo@0: paulo@0: /* mark having tried to to connect to this node already */ paulo@0: node->tried_connect = TRUE; paulo@0: paulo@0: *nodes = list_append (*nodes, node); paulo@0: paulo@0: /* stop iterating if we have enough nodes */ paulo@0: if (list_length (*nodes) >= TRY_CONNECT_NODE_LIST) paulo@0: return node; paulo@0: paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: static GtNode *clear_try_bit (TCPC *c, GtNode *node, void *udata) paulo@0: { paulo@0: node->tried_connect = FALSE; paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: static BOOL prune_registered (struct cached_node *cached, void *udata) paulo@0: { paulo@0: if (gt_node_lookup (cached->addr.ip, cached->addr.port)) paulo@0: { paulo@0: GT->DBGFN (GT, "pruning %s (already registered)", paulo@0: net_ip_str (cached->addr.ip), cached->addr.port); paulo@0: free (cached); paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: static BOOL register_cached (struct cached_node *cached, void *udata) paulo@0: { paulo@0: GtNode *node; paulo@0: paulo@0: node = gt_node_lookup (cached->addr.ip, cached->addr.port); paulo@0: paulo@0: if (node) paulo@0: { paulo@0: /* paulo@0: * Argh, gt_node_lookup only matches by IP paulo@0: * This should be assert (0) paulo@0: */ paulo@0: assert (node->gt_port != cached->addr.port); paulo@0: paulo@0: free (cached); paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: node = gt_node_register (cached->addr.ip, cached->addr.port, paulo@0: cached->klass); paulo@0: paulo@0: /* we've got to free the node, Jim */ paulo@0: free (cached); paulo@0: paulo@0: /* this happens if the address is invalid or a mem failure */ paulo@0: if (!node) paulo@0: return TRUE; paulo@0: paulo@0: gt_connect (node); paulo@0: node->tried_connect = TRUE; paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL connect_each (GtNode *node, void *udata) paulo@0: { paulo@0: if (gt_connect (node) < 0) paulo@0: { paulo@0: GT->err (GT, "Failed to connect to node %s:%hu: %s", paulo@0: net_ip_str (node->ip), node->gt_port, GIFT_NETERROR()); paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* returns number of nodes we will try to connect to */ paulo@0: static size_t try_some_nodes (time_t now) paulo@0: { paulo@0: List *nodes = NULL; paulo@0: List *cached = NULL; paulo@0: size_t total = 0; paulo@0: size_t nr; paulo@0: size_t len; paulo@0: size_t count; paulo@0: paulo@0: /* the total amount of nodes we should try */ paulo@0: nr = TRY_CONNECT_NODE_LIST + TRY_CONNECT_NODE_CACHE; paulo@0: paulo@0: /* paulo@0: * Iterate the node (pong) cache and node list until we paulo@0: * have seen 'nr' nodes or there are no more hosts to try. paulo@0: */ paulo@0: paulo@0: while (total < nr) paulo@0: { paulo@0: gt_conn_foreach (GT_CONN_FOREACH(collect_each_node), &nodes, paulo@0: GT_NODE_NONE, GT_NODE_DISCONNECTED, 0); paulo@0: paulo@0: /* grab at most nr - total nodes (still need to fix the preceeding paulo@0: * call to gt_conn_foreach() to respect 'total') */ paulo@0: count = MIN (nr - total, TRY_CONNECT_NODE_CACHE); paulo@0: assert (count >= 0); paulo@0: paulo@0: cached = gt_node_cache_get_remove (count); paulo@0: paulo@0: /* registered nodes can still slip into our node cache, argh */ paulo@0: cached = list_foreach_remove (cached, paulo@0: (ListForeachFunc)prune_registered, paulo@0: NULL); paulo@0: paulo@0: len = list_length (nodes) + list_length (cached); paulo@0: paulo@0: total += len; paulo@0: paulo@0: if (len == 0) paulo@0: break; paulo@0: paulo@0: nodes = list_foreach_remove (nodes, (ListForeachFunc)connect_each, paulo@0: NULL); paulo@0: assert (nodes == NULL); paulo@0: paulo@0: cached = list_foreach_remove (cached, (ListForeachFunc)register_cached, paulo@0: NULL); paulo@0: assert (cached == NULL); paulo@0: } paulo@0: paulo@0: return total; paulo@0: } paulo@0: paulo@0: static void maintain_class (gt_node_class_t klass, time_t now) paulo@0: { paulo@0: int connected; paulo@0: int need; paulo@0: paulo@0: connected = gt_conn_length (klass, GT_NODE_CONNECTED); paulo@0: need = gt_conn_need_connections (klass); paulo@0: paulo@0: /* paulo@0: * print the number of nodes connected if it has changed paulo@0: * XXX: print leaves from ultrapeers and leaves too. paulo@0: * damn static variables to hell paulo@0: */ paulo@0: if (klass == GT_NODE_ULTRA) paulo@0: report_connected_leaf (connected); paulo@0: paulo@0: /* 0 == perfection */ paulo@0: if (need == 0) paulo@0: return; paulo@0: paulo@0: /* disconnect some nodes */ paulo@0: if (need < 0) paulo@0: { paulo@0: if (disconnect_timer) paulo@0: return; paulo@0: paulo@0: /* paulo@0: * Disconnect the node soon, because it could happen that paulo@0: * someone will disconnect from us first, causing cascading paulo@0: * disconnects. paulo@0: */ paulo@0: GT->DBGFN (GT, "starting disconnect timer..."); paulo@0: disconnect_timer = timer_add (4 * SECONDS, paulo@0: (TimerCallback)disconnect_excess_timer, paulo@0: NULL); paulo@0: return; paulo@0: } paulo@0: paulo@0: /* paulo@0: * If try_some_nodes() returns 0, then there are no nodes in the node paulo@0: * cache nor any on the node list that we haven't tried yet. In that case, paulo@0: * we need to contact the gwebcaches and hope a fresh infusion of nodes paulo@0: * will help. While we wait, we retry all the nodes we already tried by paulo@0: * clearing node->tried_connect for each node, which otherwise prevents paulo@0: * from recontacting the nodes. paulo@0: * paulo@0: * We will "block" on the gwebcaches if the bandwidth is completely paulo@0: * saturated and we can't get a reply from anyone, or if there are no paulo@0: * ultrapeers with connection slots available. The gwebcache subsystem paulo@0: * imposes its own limits on how often it will contact gwebcaches, so if paulo@0: * we do end up in this situation, hopefully we will simply spend most of paulo@0: * the time unconnected rather than hammering the gwebcaches. paulo@0: */ paulo@0: if (try_some_nodes (now) == 0) paulo@0: { paulo@0: size_t len; paulo@0: paulo@0: len = gt_conn_length (GT_NODE_NONE, GT_NODE_ANY); paulo@0: GT->dbg (GT, "try_some_nodes() returned 0. node list len=%u", len); paulo@0: paulo@0: if (connected == 0 || len < 20) paulo@0: { paulo@0: /* try to get more hosts */ paulo@0: GT->dbg (GT, "No hosts to try. Looking in gwebcaches..."); paulo@0: gt_web_cache_update (); paulo@0: } paulo@0: paulo@0: GT->dbg (GT, "Retrying to connect to nodes..."); paulo@0: paulo@0: /* while we are waiting for the gwebcaches, try each node again */ paulo@0: gt_conn_foreach (GT_CONN_FOREACH(clear_try_bit), NULL, paulo@0: GT_NODE_NONE, GT_NODE_ANY, 0); paulo@0: paulo@0: return; paulo@0: } paulo@0: } paulo@0: paulo@0: static GtNode *disconnect_no_ping_replies (TCPC *c, GtNode *node, void *udata) paulo@0: { paulo@0: if (node->pings_with_noreply < MAX_UNREPLIED_PINGS) paulo@0: return NULL; paulo@0: paulo@0: GT->DBGSOCK (GT, node->c, "%d unreplied pings. disconnecting", paulo@0: node->pings_with_noreply); paulo@0: paulo@0: gt_node_disconnect (c); paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* paulo@0: * This is the main network maintainence function. All connections to the paulo@0: * network are initiated from here. paulo@0: */ paulo@0: static BOOL maintain (void *udata) paulo@0: { paulo@0: time_t now; paulo@0: paulo@0: now = time (NULL); paulo@0: paulo@0: /* disconnect nodes without query routing if we are not a supernode */ paulo@0: if (!(GT_SELF->klass & GT_NODE_ULTRA)) paulo@0: disconnect_no_query_route (); paulo@0: paulo@0: #if 0 paulo@0: trace_list (connections); paulo@0: #endif paulo@0: paulo@0: /* paulo@0: * Send pings to all connected nodes. We used to do this only every paulo@0: * minute, but because some nodes have short timeouts if they receive paulo@0: * nothing from you, we now do it every MAINTAIN_INTERVAL. paulo@0: */ paulo@0: ping_hosts (now); paulo@0: paulo@0: maintain_class (GT_NODE_ULTRA, now); paulo@0: maintain_class (GT_NODE_LEAF, now); paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL idle_disconnect (void *udata) paulo@0: { paulo@0: gt_conn_foreach (GT_CONN_FOREACH(disconnect_no_ping_replies), NULL, paulo@0: GT_NODE_NONE, GT_NODE_CONNECTED, 0); paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL cleanup (void *udata) paulo@0: { paulo@0: /* trim excess nodes */ paulo@0: gt_conn_trim (); paulo@0: paulo@0: /* save to disk important nodes from the node list */ paulo@0: gt_node_list_save (); paulo@0: paulo@0: /* save to disk important nodes from the node cache */ paulo@0: gt_node_cache_save (); paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL retry_all (void *udata) paulo@0: { paulo@0: /* paulo@0: * Clear the 'tried' bit for all nodes, so if we start looking for nodes paulo@0: * we try reconnecting to the ones we know about instead of contacting the paulo@0: * gwebcaches. paulo@0: * paulo@0: * NOTE: should all the nodes be possibly retried (GT_NODE_ANY) or paulo@0: * only those that are disconnected (GT_NODE_DISCONNECTED)? paulo@0: */ paulo@0: gt_conn_foreach (GT_CONN_FOREACH(clear_try_bit), NULL, paulo@0: GT_NODE_NONE, GT_NODE_ANY, 0); paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: /*****************************************************************************/ paulo@0: paulo@0: void gt_netorg_init (void) paulo@0: { paulo@0: if (maintain_timer != 0) paulo@0: return; paulo@0: paulo@0: /* load the node cache */ paulo@0: gt_node_cache_init (); paulo@0: paulo@0: /* setup the links maintain timer */ paulo@0: maintain_timer = timer_add (MAINTAIN_INTERVAL, paulo@0: maintain, NULL); paulo@0: paulo@0: idle_disconnect_timer = timer_add (IDLE_DISCONNECT_INTERVAL, paulo@0: idle_disconnect, NULL); paulo@0: paulo@0: cleanup_timer = timer_add (CLEANUP_INTERVAL, paulo@0: cleanup, NULL); paulo@0: paulo@0: retry_all_timer = timer_add (RETRY_ALL_INTERVAL, paulo@0: retry_all, NULL); paulo@0: paulo@0: /* call it now so we don't have to wait the first time */ paulo@0: maintain (NULL); paulo@0: } paulo@0: paulo@0: void gt_netorg_cleanup (void) paulo@0: { paulo@0: /* save the node cache */ paulo@0: gt_node_cache_cleanup (); paulo@0: paulo@0: timer_remove_zero (&disconnect_timer); paulo@0: paulo@0: timer_remove_zero (&maintain_timer); paulo@0: timer_remove_zero (&idle_disconnect_timer); paulo@0: timer_remove_zero (&cleanup_timer); paulo@0: timer_remove_zero (&retry_all_timer); paulo@0: }