Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
diff src/gt_stats.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/gt_stats.c Sat Feb 20 21:18:28 2010 -0800 1.3 @@ -0,0 +1,303 @@ 1.4 +/* 1.5 + * $Id: gt_stats.c,v 1.21 2005/01/04 14:59:58 mkern Exp $ 1.6 + * 1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) 1.8 + * 1.9 + * This program is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License as published by the 1.11 + * Free Software Foundation; either version 2, or (at your option) any 1.12 + * later version. 1.13 + * 1.14 + * This program is distributed in the hope that it will be useful, but 1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.17 + * General Public License for more details. 1.18 + */ 1.19 + 1.20 +#include "gt_gnutella.h" 1.21 + 1.22 +#include "gt_node.h" 1.23 +#include "gt_node_list.h" 1.24 + 1.25 +#include "gt_search.h" 1.26 + 1.27 +/*****************************************************************************/ 1.28 + 1.29 +struct gt_stats 1.30 +{ 1.31 + double size_kb; 1.32 + unsigned long files; 1.33 + size_t users; 1.34 +}; 1.35 + 1.36 +/* number of stats samples to accumulate */ 1.37 +#define NR_SAMPLES 64 1.38 + 1.39 +/* number of samples to take surrounding the median */ 1.40 +#define NR_MEDIAN_SAMPLES 2 1.41 + 1.42 +/*****************************************************************************/ 1.43 + 1.44 +/* 1.45 + * This should be computed from our neighboring peers, but for now it 1.46 + * hardcodes the max leaves for the most numerous nodes on the network, 1.47 + * Limewire (32) and BearShare (10) 1.48 + */ 1.49 +static int avg_leaves = (32 + 10) / 2; 1.50 + 1.51 +/* default ttl we use in searches */ 1.52 +static int default_ttl = GT_SEARCH_TTL; 1.53 + 1.54 +/* default outdegree for most nodes */ 1.55 +static int default_degree = 6; 1.56 + 1.57 +/* keep track of the stats for the last NR_SAMPLES pongs */ 1.58 +static struct gt_stats samples[NR_SAMPLES]; 1.59 +static size_t samples_index; 1.60 +static size_t samples_count; 1.61 + 1.62 +/*****************************************************************************/ 1.63 + 1.64 +void gt_stats_accumulate (in_addr_t ipv4, in_port_t port, 1.65 + in_addr_t src_ip, uint32_t files, 1.66 + uint32_t size_kb) 1.67 +{ 1.68 + samples[samples_index].files = files; 1.69 + samples[samples_index].size_kb = size_kb; 1.70 + 1.71 + samples_index = (samples_index + 1) % NR_SAMPLES; 1.72 + samples_count++; 1.73 + 1.74 + if (samples_count > NR_SAMPLES) 1.75 + samples_count = NR_SAMPLES; 1.76 +} 1.77 + 1.78 +static int stats_cmp (const void *a, const void *b) 1.79 +{ 1.80 + const struct gt_stats *a_s = a; 1.81 + const struct gt_stats *b_s = b; 1.82 + 1.83 + return a_s->size_kb - b_s->size_kb; 1.84 +} 1.85 + 1.86 +static void clear_stats (struct gt_stats *stats) 1.87 +{ 1.88 + stats->size_kb = 0.0; 1.89 + stats->files = 0; 1.90 + stats->users = 0; 1.91 +} 1.92 + 1.93 +static void get_median_stats (struct gt_stats *pong_stats, size_t nr) 1.94 +{ 1.95 + int low, high; 1.96 + int mid; 1.97 + int i; 1.98 + 1.99 + if (nr == 0) 1.100 + return; 1.101 + 1.102 + mid = nr / 2; 1.103 + 1.104 + low = mid - NR_MEDIAN_SAMPLES; 1.105 + high = mid + NR_MEDIAN_SAMPLES; 1.106 + 1.107 + if (low < 0) 1.108 + low = 0; 1.109 + 1.110 + if (high > nr - 1) 1.111 + high = nr - 1; 1.112 + 1.113 + for (i = low; i <= high; i++) 1.114 + { 1.115 + pong_stats->size_kb += samples[i].size_kb; 1.116 + pong_stats->files += samples[i].files; 1.117 + pong_stats->users++; 1.118 + } 1.119 +} 1.120 + 1.121 +static void get_pong_stats (struct gt_stats *pong_stats) 1.122 +{ 1.123 + qsort (samples, samples_count, sizeof (struct gt_stats), stats_cmp); 1.124 + 1.125 + clear_stats (pong_stats); 1.126 + get_median_stats (pong_stats, samples_count); 1.127 +} 1.128 + 1.129 +/*****************************************************************************/ 1.130 + 1.131 +static TCPC *count_stats (TCPC *c, GtNode *node, struct gt_stats *st) 1.132 +{ 1.133 + /* sanity check */ 1.134 + if (node->size_kb == (uint32_t)-1 || node->files == (uint32_t)-1) 1.135 + return NULL; 1.136 + 1.137 + st->size_kb += node->size_kb; 1.138 + st->files += node->files; 1.139 + 1.140 + if (node->vitality > 0) 1.141 + st->users++; 1.142 + 1.143 + return NULL; 1.144 +} 1.145 + 1.146 +static void get_conn_stats (struct gt_stats *conn_stats) 1.147 +{ 1.148 + clear_stats (conn_stats); 1.149 + 1.150 + /* grab statistics from the nodes structures */ 1.151 + gt_conn_foreach (GT_CONN_FOREACH(count_stats), conn_stats, 1.152 + GT_NODE_NONE, GT_NODE_ANY, 0); 1.153 +} 1.154 + 1.155 +/*****************************************************************************/ 1.156 + 1.157 +/* no need to pull in math.h for this simple thing: exponent is small */ 1.158 +static unsigned long int_pow (int base, int exponent) 1.159 +{ 1.160 + unsigned long total = 1; 1.161 + 1.162 + while (exponent-- > 0) 1.163 + total *= base; 1.164 + 1.165 + return total; 1.166 +} 1.167 + 1.168 +static unsigned long sum_network (int degree, int ttl) 1.169 +{ 1.170 + int i; 1.171 + unsigned long sum; 1.172 + 1.173 + if (ttl <= 0) 1.174 + return 0; 1.175 + 1.176 + sum = degree; 1.177 + 1.178 + for (i = 2; i <= ttl; i++) 1.179 + sum += int_pow (degree-1, i-1) * degree; 1.180 + 1.181 + return sum; 1.182 +} 1.183 + 1.184 +/* 1.185 + * For each node we're connected to, check the 'X-Degree' and 'X-Max-TTL' 1.186 + * handshake headers to use for degree and ttl in the calculations. 1.187 + */ 1.188 +static GtNode *count_edges (TCPC *c, GtNode *node, void *udata) 1.189 +{ 1.190 + unsigned long *count = udata; 1.191 + unsigned long degree = 0; 1.192 + unsigned long ttl = 0; 1.193 + char *max_ttl_str; 1.194 + char *x_degree_str; 1.195 + 1.196 + if ((max_ttl_str = dataset_lookupstr (node->hdr, "x-max-ttl")) != NULL) 1.197 + ttl = ATOUL (max_ttl_str); 1.198 + 1.199 + if ((x_degree_str = dataset_lookupstr (node->hdr, "x-degree")) != NULL) 1.200 + degree = ATOUL (x_degree_str); 1.201 + 1.202 + if (degree == 0 || degree > 200) 1.203 + degree = default_degree; 1.204 + 1.205 + if (ttl == 0 || ttl > 30) 1.206 + ttl = default_ttl; 1.207 + 1.208 + /* workaround limewire misfeature where it appears X-Max-TTL is set 1.209 + * too high */ 1.210 + if (degree > 30 && ttl > 5) 1.211 + ttl = default_ttl; 1.212 + 1.213 + *count += sum_network (degree, ttl); 1.214 + return NULL; 1.215 +} 1.216 + 1.217 +/* 1.218 + * Of course this is totally inaccurate, and not even dynamic. It 1.219 + * approximates the maximum number of users in a network of 1.220 + * Gnutella's structure in a given outdegree and TTL range 1.221 + * (default_degree, default_ttl). This doesn't represent the whole 1.222 + * network, and since the network has a significant amount of 1.223 + * cycles this calculation is likely way too much. 1.224 + * 1.225 + * To compensate, we divide by 3. We could make a better guess 1.226 + * about the number of nodes if we could find the number of cycles 1.227 + * some way, but this information is not readily available. 1.228 + */ 1.229 +static unsigned long guess_users (void) 1.230 +{ 1.231 + unsigned long users = 0; 1.232 + 1.233 + gt_conn_foreach (count_edges, &users, 1.234 + GT_NODE_ULTRA, GT_NODE_CONNECTED, 0); 1.235 + 1.236 + /* multiply by the number of leaves */ 1.237 + users *= avg_leaves; 1.238 + 1.239 + /* divide by average redundancy level. TODO: calculate this dynamically */ 1.240 + users /= ((3 + 1) / 2); 1.241 + 1.242 + /* divide by 3 to account for cycles */ 1.243 + users /= 3; 1.244 + 1.245 + /* multiply by 2 because, well...this whole thing is misleading anyway, 1.246 + * and the total number of users is greater than the horizon */ 1.247 + users *= 2; 1.248 + 1.249 + return users; 1.250 +} 1.251 + 1.252 +/*****************************************************************************/ 1.253 + 1.254 +/* 1.255 + * TODO: accumulate statistics on average leaves/cycles on our immediate 1.256 + * peers. 1.257 + */ 1.258 +int gnutella_stats (Protocol *p, unsigned long *users, unsigned long *files, 1.259 + double *size, Dataset **extra) 1.260 +{ 1.261 + struct gt_stats pong_stats; 1.262 + struct gt_stats conn_stats; 1.263 + struct gt_stats avg_stats; 1.264 + size_t connected; 1.265 + 1.266 + *files = *users = *size = 0; 1.267 + 1.268 + connected = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED); 1.269 + 1.270 + if (connected == 0) 1.271 + return 0; 1.272 + 1.273 + get_pong_stats (&pong_stats); 1.274 + get_conn_stats (&conn_stats); 1.275 + 1.276 + if (conn_stats.users == 0) 1.277 + conn_stats.users = 1; 1.278 + 1.279 + if (pong_stats.users == 0) 1.280 + pong_stats.users = 1; 1.281 + 1.282 + /* divide the total files size by two since it is inflated by ultrapeers 1.283 + * abusing it to communicate their ultrapeer-ness to other nodes */ 1.284 + pong_stats.size_kb /= 2; 1.285 + conn_stats.size_kb /= 2; 1.286 + 1.287 + /* find the average of the data for our two sources */ 1.288 + pong_stats.size_kb /= pong_stats.users; 1.289 + pong_stats.files /= pong_stats.users; 1.290 + conn_stats.size_kb /= conn_stats.users; 1.291 + conn_stats.files /= conn_stats.users; 1.292 + 1.293 + /* put the stats of previously connected nodes on equal footing with the 1.294 + * stats collected from pongs -- they should be more reliable */ 1.295 + avg_stats.files = (pong_stats.files + conn_stats.files) / 2; 1.296 + avg_stats.size_kb = (pong_stats.size_kb + conn_stats.size_kb) / 2; 1.297 + 1.298 + /* add conn_stats.users for a little variation (not much) */ 1.299 + avg_stats.users = guess_users () + conn_stats.users; 1.300 + 1.301 + *users = avg_stats.users; 1.302 + *files = avg_stats.files * avg_stats.users; 1.303 + *size = avg_stats.size_kb * avg_stats.users / 1024 / 1024; 1.304 + 1.305 + return connected; 1.306 +}