paulo@0: /* paulo@0: * $Id: gt_stats.c,v 1.21 2005/01/04 14:59:58 mkern Exp $ paulo@0: * paulo@0: * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) paulo@0: * paulo@0: * This program is free software; you can redistribute it and/or modify it paulo@0: * under the terms of the GNU General Public License as published by the paulo@0: * Free Software Foundation; either version 2, or (at your option) any paulo@0: * later version. paulo@0: * paulo@0: * This program is distributed in the hope that it will be useful, but paulo@0: * WITHOUT ANY WARRANTY; without even the implied warranty of paulo@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU paulo@0: * General Public License for more details. paulo@0: */ paulo@0: paulo@0: #include "gt_gnutella.h" paulo@0: paulo@0: #include "gt_node.h" paulo@0: #include "gt_node_list.h" paulo@0: paulo@0: #include "gt_search.h" paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: struct gt_stats paulo@0: { paulo@0: double size_kb; paulo@0: unsigned long files; paulo@0: size_t users; paulo@0: }; paulo@0: paulo@0: /* number of stats samples to accumulate */ paulo@0: #define NR_SAMPLES 64 paulo@0: paulo@0: /* number of samples to take surrounding the median */ paulo@0: #define NR_MEDIAN_SAMPLES 2 paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* paulo@0: * This should be computed from our neighboring peers, but for now it paulo@0: * hardcodes the max leaves for the most numerous nodes on the network, paulo@0: * Limewire (32) and BearShare (10) paulo@0: */ paulo@0: static int avg_leaves = (32 + 10) / 2; paulo@0: paulo@0: /* default ttl we use in searches */ paulo@0: static int default_ttl = GT_SEARCH_TTL; paulo@0: paulo@0: /* default outdegree for most nodes */ paulo@0: static int default_degree = 6; paulo@0: paulo@0: /* keep track of the stats for the last NR_SAMPLES pongs */ paulo@0: static struct gt_stats samples[NR_SAMPLES]; paulo@0: static size_t samples_index; paulo@0: static size_t samples_count; paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: void gt_stats_accumulate (in_addr_t ipv4, in_port_t port, paulo@0: in_addr_t src_ip, uint32_t files, paulo@0: uint32_t size_kb) paulo@0: { paulo@0: samples[samples_index].files = files; paulo@0: samples[samples_index].size_kb = size_kb; paulo@0: paulo@0: samples_index = (samples_index + 1) % NR_SAMPLES; paulo@0: samples_count++; paulo@0: paulo@0: if (samples_count > NR_SAMPLES) paulo@0: samples_count = NR_SAMPLES; paulo@0: } paulo@0: paulo@0: static int stats_cmp (const void *a, const void *b) paulo@0: { paulo@0: const struct gt_stats *a_s = a; paulo@0: const struct gt_stats *b_s = b; paulo@0: paulo@0: return a_s->size_kb - b_s->size_kb; paulo@0: } paulo@0: paulo@0: static void clear_stats (struct gt_stats *stats) paulo@0: { paulo@0: stats->size_kb = 0.0; paulo@0: stats->files = 0; paulo@0: stats->users = 0; paulo@0: } paulo@0: paulo@0: static void get_median_stats (struct gt_stats *pong_stats, size_t nr) paulo@0: { paulo@0: int low, high; paulo@0: int mid; paulo@0: int i; paulo@0: paulo@0: if (nr == 0) paulo@0: return; paulo@0: paulo@0: mid = nr / 2; paulo@0: paulo@0: low = mid - NR_MEDIAN_SAMPLES; paulo@0: high = mid + NR_MEDIAN_SAMPLES; paulo@0: paulo@0: if (low < 0) paulo@0: low = 0; paulo@0: paulo@0: if (high > nr - 1) paulo@0: high = nr - 1; paulo@0: paulo@0: for (i = low; i <= high; i++) paulo@0: { paulo@0: pong_stats->size_kb += samples[i].size_kb; paulo@0: pong_stats->files += samples[i].files; paulo@0: pong_stats->users++; paulo@0: } paulo@0: } paulo@0: paulo@0: static void get_pong_stats (struct gt_stats *pong_stats) paulo@0: { paulo@0: qsort (samples, samples_count, sizeof (struct gt_stats), stats_cmp); paulo@0: paulo@0: clear_stats (pong_stats); paulo@0: get_median_stats (pong_stats, samples_count); paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static TCPC *count_stats (TCPC *c, GtNode *node, struct gt_stats *st) paulo@0: { paulo@0: /* sanity check */ paulo@0: if (node->size_kb == (uint32_t)-1 || node->files == (uint32_t)-1) paulo@0: return NULL; paulo@0: paulo@0: st->size_kb += node->size_kb; paulo@0: st->files += node->files; paulo@0: paulo@0: if (node->vitality > 0) paulo@0: st->users++; paulo@0: paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: static void get_conn_stats (struct gt_stats *conn_stats) paulo@0: { paulo@0: clear_stats (conn_stats); paulo@0: paulo@0: /* grab statistics from the nodes structures */ paulo@0: gt_conn_foreach (GT_CONN_FOREACH(count_stats), conn_stats, paulo@0: GT_NODE_NONE, GT_NODE_ANY, 0); paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* no need to pull in math.h for this simple thing: exponent is small */ paulo@0: static unsigned long int_pow (int base, int exponent) paulo@0: { paulo@0: unsigned long total = 1; paulo@0: paulo@0: while (exponent-- > 0) paulo@0: total *= base; paulo@0: paulo@0: return total; paulo@0: } paulo@0: paulo@0: static unsigned long sum_network (int degree, int ttl) paulo@0: { paulo@0: int i; paulo@0: unsigned long sum; paulo@0: paulo@0: if (ttl <= 0) paulo@0: return 0; paulo@0: paulo@0: sum = degree; paulo@0: paulo@0: for (i = 2; i <= ttl; i++) paulo@0: sum += int_pow (degree-1, i-1) * degree; paulo@0: paulo@0: return sum; paulo@0: } paulo@0: paulo@0: /* paulo@0: * For each node we're connected to, check the 'X-Degree' and 'X-Max-TTL' paulo@0: * handshake headers to use for degree and ttl in the calculations. paulo@0: */ paulo@0: static GtNode *count_edges (TCPC *c, GtNode *node, void *udata) paulo@0: { paulo@0: unsigned long *count = udata; paulo@0: unsigned long degree = 0; paulo@0: unsigned long ttl = 0; paulo@0: char *max_ttl_str; paulo@0: char *x_degree_str; paulo@0: paulo@0: if ((max_ttl_str = dataset_lookupstr (node->hdr, "x-max-ttl")) != NULL) paulo@0: ttl = ATOUL (max_ttl_str); paulo@0: paulo@0: if ((x_degree_str = dataset_lookupstr (node->hdr, "x-degree")) != NULL) paulo@0: degree = ATOUL (x_degree_str); paulo@0: paulo@0: if (degree == 0 || degree > 200) paulo@0: degree = default_degree; paulo@0: paulo@0: if (ttl == 0 || ttl > 30) paulo@0: ttl = default_ttl; paulo@0: paulo@0: /* workaround limewire misfeature where it appears X-Max-TTL is set paulo@0: * too high */ paulo@0: if (degree > 30 && ttl > 5) paulo@0: ttl = default_ttl; paulo@0: paulo@0: *count += sum_network (degree, ttl); paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: /* paulo@0: * Of course this is totally inaccurate, and not even dynamic. It paulo@0: * approximates the maximum number of users in a network of paulo@0: * Gnutella's structure in a given outdegree and TTL range paulo@0: * (default_degree, default_ttl). This doesn't represent the whole paulo@0: * network, and since the network has a significant amount of paulo@0: * cycles this calculation is likely way too much. paulo@0: * paulo@0: * To compensate, we divide by 3. We could make a better guess paulo@0: * about the number of nodes if we could find the number of cycles paulo@0: * some way, but this information is not readily available. paulo@0: */ paulo@0: static unsigned long guess_users (void) paulo@0: { paulo@0: unsigned long users = 0; paulo@0: paulo@0: gt_conn_foreach (count_edges, &users, paulo@0: GT_NODE_ULTRA, GT_NODE_CONNECTED, 0); paulo@0: paulo@0: /* multiply by the number of leaves */ paulo@0: users *= avg_leaves; paulo@0: paulo@0: /* divide by average redundancy level. TODO: calculate this dynamically */ paulo@0: users /= ((3 + 1) / 2); paulo@0: paulo@0: /* divide by 3 to account for cycles */ paulo@0: users /= 3; paulo@0: paulo@0: /* multiply by 2 because, well...this whole thing is misleading anyway, paulo@0: * and the total number of users is greater than the horizon */ paulo@0: users *= 2; paulo@0: paulo@0: return users; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* paulo@0: * TODO: accumulate statistics on average leaves/cycles on our immediate paulo@0: * peers. paulo@0: */ paulo@0: int gnutella_stats (Protocol *p, unsigned long *users, unsigned long *files, paulo@0: double *size, Dataset **extra) paulo@0: { paulo@0: struct gt_stats pong_stats; paulo@0: struct gt_stats conn_stats; paulo@0: struct gt_stats avg_stats; paulo@0: size_t connected; paulo@0: paulo@0: *files = *users = *size = 0; paulo@0: paulo@0: connected = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED); paulo@0: paulo@0: if (connected == 0) paulo@0: return 0; paulo@0: paulo@0: get_pong_stats (&pong_stats); paulo@0: get_conn_stats (&conn_stats); paulo@0: paulo@0: if (conn_stats.users == 0) paulo@0: conn_stats.users = 1; paulo@0: paulo@0: if (pong_stats.users == 0) paulo@0: pong_stats.users = 1; paulo@0: paulo@0: /* divide the total files size by two since it is inflated by ultrapeers paulo@0: * abusing it to communicate their ultrapeer-ness to other nodes */ paulo@0: pong_stats.size_kb /= 2; paulo@0: conn_stats.size_kb /= 2; paulo@0: paulo@0: /* find the average of the data for our two sources */ paulo@0: pong_stats.size_kb /= pong_stats.users; paulo@0: pong_stats.files /= pong_stats.users; paulo@0: conn_stats.size_kb /= conn_stats.users; paulo@0: conn_stats.files /= conn_stats.users; paulo@0: paulo@0: /* put the stats of previously connected nodes on equal footing with the paulo@0: * stats collected from pongs -- they should be more reliable */ paulo@0: avg_stats.files = (pong_stats.files + conn_stats.files) / 2; paulo@0: avg_stats.size_kb = (pong_stats.size_kb + conn_stats.size_kb) / 2; paulo@0: paulo@0: /* add conn_stats.users for a little variation (not much) */ paulo@0: avg_stats.users = guess_users () + conn_stats.users; paulo@0: paulo@0: *users = avg_stats.users; paulo@0: *files = avg_stats.files * avg_stats.users; paulo@0: *size = avg_stats.size_kb * avg_stats.users / 1024 / 1024; paulo@0: paulo@0: return connected; paulo@0: }