annotate src/gt_stats.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
rev   line source
paulo@0 1 /*
paulo@0 2 * $Id: gt_stats.c,v 1.21 2005/01/04 14:59:58 mkern Exp $
paulo@0 3 *
paulo@0 4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
paulo@0 5 *
paulo@0 6 * This program is free software; you can redistribute it and/or modify it
paulo@0 7 * under the terms of the GNU General Public License as published by the
paulo@0 8 * Free Software Foundation; either version 2, or (at your option) any
paulo@0 9 * later version.
paulo@0 10 *
paulo@0 11 * This program is distributed in the hope that it will be useful, but
paulo@0 12 * WITHOUT ANY WARRANTY; without even the implied warranty of
paulo@0 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
paulo@0 14 * General Public License for more details.
paulo@0 15 */
paulo@0 16
paulo@0 17 #include "gt_gnutella.h"
paulo@0 18
paulo@0 19 #include "gt_node.h"
paulo@0 20 #include "gt_node_list.h"
paulo@0 21
paulo@0 22 #include "gt_search.h"
paulo@0 23
paulo@0 24 /*****************************************************************************/
paulo@0 25
paulo@0 26 struct gt_stats
paulo@0 27 {
paulo@0 28 double size_kb;
paulo@0 29 unsigned long files;
paulo@0 30 size_t users;
paulo@0 31 };
paulo@0 32
paulo@0 33 /* number of stats samples to accumulate */
paulo@0 34 #define NR_SAMPLES 64
paulo@0 35
paulo@0 36 /* number of samples to take surrounding the median */
paulo@0 37 #define NR_MEDIAN_SAMPLES 2
paulo@0 38
paulo@0 39 /*****************************************************************************/
paulo@0 40
paulo@0 41 /*
paulo@0 42 * This should be computed from our neighboring peers, but for now it
paulo@0 43 * hardcodes the max leaves for the most numerous nodes on the network,
paulo@0 44 * Limewire (32) and BearShare (10)
paulo@0 45 */
paulo@0 46 static int avg_leaves = (32 + 10) / 2;
paulo@0 47
paulo@0 48 /* default ttl we use in searches */
paulo@0 49 static int default_ttl = GT_SEARCH_TTL;
paulo@0 50
paulo@0 51 /* default outdegree for most nodes */
paulo@0 52 static int default_degree = 6;
paulo@0 53
paulo@0 54 /* keep track of the stats for the last NR_SAMPLES pongs */
paulo@0 55 static struct gt_stats samples[NR_SAMPLES];
paulo@0 56 static size_t samples_index;
paulo@0 57 static size_t samples_count;
paulo@0 58
paulo@0 59 /*****************************************************************************/
paulo@0 60
paulo@0 61 void gt_stats_accumulate (in_addr_t ipv4, in_port_t port,
paulo@0 62 in_addr_t src_ip, uint32_t files,
paulo@0 63 uint32_t size_kb)
paulo@0 64 {
paulo@0 65 samples[samples_index].files = files;
paulo@0 66 samples[samples_index].size_kb = size_kb;
paulo@0 67
paulo@0 68 samples_index = (samples_index + 1) % NR_SAMPLES;
paulo@0 69 samples_count++;
paulo@0 70
paulo@0 71 if (samples_count > NR_SAMPLES)
paulo@0 72 samples_count = NR_SAMPLES;
paulo@0 73 }
paulo@0 74
paulo@0 75 static int stats_cmp (const void *a, const void *b)
paulo@0 76 {
paulo@0 77 const struct gt_stats *a_s = a;
paulo@0 78 const struct gt_stats *b_s = b;
paulo@0 79
paulo@0 80 return a_s->size_kb - b_s->size_kb;
paulo@0 81 }
paulo@0 82
paulo@0 83 static void clear_stats (struct gt_stats *stats)
paulo@0 84 {
paulo@0 85 stats->size_kb = 0.0;
paulo@0 86 stats->files = 0;
paulo@0 87 stats->users = 0;
paulo@0 88 }
paulo@0 89
paulo@0 90 static void get_median_stats (struct gt_stats *pong_stats, size_t nr)
paulo@0 91 {
paulo@0 92 int low, high;
paulo@0 93 int mid;
paulo@0 94 int i;
paulo@0 95
paulo@0 96 if (nr == 0)
paulo@0 97 return;
paulo@0 98
paulo@0 99 mid = nr / 2;
paulo@0 100
paulo@0 101 low = mid - NR_MEDIAN_SAMPLES;
paulo@0 102 high = mid + NR_MEDIAN_SAMPLES;
paulo@0 103
paulo@0 104 if (low < 0)
paulo@0 105 low = 0;
paulo@0 106
paulo@0 107 if (high > nr - 1)
paulo@0 108 high = nr - 1;
paulo@0 109
paulo@0 110 for (i = low; i <= high; i++)
paulo@0 111 {
paulo@0 112 pong_stats->size_kb += samples[i].size_kb;
paulo@0 113 pong_stats->files += samples[i].files;
paulo@0 114 pong_stats->users++;
paulo@0 115 }
paulo@0 116 }
paulo@0 117
paulo@0 118 static void get_pong_stats (struct gt_stats *pong_stats)
paulo@0 119 {
paulo@0 120 qsort (samples, samples_count, sizeof (struct gt_stats), stats_cmp);
paulo@0 121
paulo@0 122 clear_stats (pong_stats);
paulo@0 123 get_median_stats (pong_stats, samples_count);
paulo@0 124 }
paulo@0 125
paulo@0 126 /*****************************************************************************/
paulo@0 127
paulo@0 128 static TCPC *count_stats (TCPC *c, GtNode *node, struct gt_stats *st)
paulo@0 129 {
paulo@0 130 /* sanity check */
paulo@0 131 if (node->size_kb == (uint32_t)-1 || node->files == (uint32_t)-1)
paulo@0 132 return NULL;
paulo@0 133
paulo@0 134 st->size_kb += node->size_kb;
paulo@0 135 st->files += node->files;
paulo@0 136
paulo@0 137 if (node->vitality > 0)
paulo@0 138 st->users++;
paulo@0 139
paulo@0 140 return NULL;
paulo@0 141 }
paulo@0 142
paulo@0 143 static void get_conn_stats (struct gt_stats *conn_stats)
paulo@0 144 {
paulo@0 145 clear_stats (conn_stats);
paulo@0 146
paulo@0 147 /* grab statistics from the nodes structures */
paulo@0 148 gt_conn_foreach (GT_CONN_FOREACH(count_stats), conn_stats,
paulo@0 149 GT_NODE_NONE, GT_NODE_ANY, 0);
paulo@0 150 }
paulo@0 151
paulo@0 152 /*****************************************************************************/
paulo@0 153
paulo@0 154 /* no need to pull in math.h for this simple thing: exponent is small */
paulo@0 155 static unsigned long int_pow (int base, int exponent)
paulo@0 156 {
paulo@0 157 unsigned long total = 1;
paulo@0 158
paulo@0 159 while (exponent-- > 0)
paulo@0 160 total *= base;
paulo@0 161
paulo@0 162 return total;
paulo@0 163 }
paulo@0 164
paulo@0 165 static unsigned long sum_network (int degree, int ttl)
paulo@0 166 {
paulo@0 167 int i;
paulo@0 168 unsigned long sum;
paulo@0 169
paulo@0 170 if (ttl <= 0)
paulo@0 171 return 0;
paulo@0 172
paulo@0 173 sum = degree;
paulo@0 174
paulo@0 175 for (i = 2; i <= ttl; i++)
paulo@0 176 sum += int_pow (degree-1, i-1) * degree;
paulo@0 177
paulo@0 178 return sum;
paulo@0 179 }
paulo@0 180
paulo@0 181 /*
paulo@0 182 * For each node we're connected to, check the 'X-Degree' and 'X-Max-TTL'
paulo@0 183 * handshake headers to use for degree and ttl in the calculations.
paulo@0 184 */
paulo@0 185 static GtNode *count_edges (TCPC *c, GtNode *node, void *udata)
paulo@0 186 {
paulo@0 187 unsigned long *count = udata;
paulo@0 188 unsigned long degree = 0;
paulo@0 189 unsigned long ttl = 0;
paulo@0 190 char *max_ttl_str;
paulo@0 191 char *x_degree_str;
paulo@0 192
paulo@0 193 if ((max_ttl_str = dataset_lookupstr (node->hdr, "x-max-ttl")) != NULL)
paulo@0 194 ttl = ATOUL (max_ttl_str);
paulo@0 195
paulo@0 196 if ((x_degree_str = dataset_lookupstr (node->hdr, "x-degree")) != NULL)
paulo@0 197 degree = ATOUL (x_degree_str);
paulo@0 198
paulo@0 199 if (degree == 0 || degree > 200)
paulo@0 200 degree = default_degree;
paulo@0 201
paulo@0 202 if (ttl == 0 || ttl > 30)
paulo@0 203 ttl = default_ttl;
paulo@0 204
paulo@0 205 /* workaround limewire misfeature where it appears X-Max-TTL is set
paulo@0 206 * too high */
paulo@0 207 if (degree > 30 && ttl > 5)
paulo@0 208 ttl = default_ttl;
paulo@0 209
paulo@0 210 *count += sum_network (degree, ttl);
paulo@0 211 return NULL;
paulo@0 212 }
paulo@0 213
paulo@0 214 /*
paulo@0 215 * Of course this is totally inaccurate, and not even dynamic. It
paulo@0 216 * approximates the maximum number of users in a network of
paulo@0 217 * Gnutella's structure in a given outdegree and TTL range
paulo@0 218 * (default_degree, default_ttl). This doesn't represent the whole
paulo@0 219 * network, and since the network has a significant amount of
paulo@0 220 * cycles this calculation is likely way too much.
paulo@0 221 *
paulo@0 222 * To compensate, we divide by 3. We could make a better guess
paulo@0 223 * about the number of nodes if we could find the number of cycles
paulo@0 224 * some way, but this information is not readily available.
paulo@0 225 */
paulo@0 226 static unsigned long guess_users (void)
paulo@0 227 {
paulo@0 228 unsigned long users = 0;
paulo@0 229
paulo@0 230 gt_conn_foreach (count_edges, &users,
paulo@0 231 GT_NODE_ULTRA, GT_NODE_CONNECTED, 0);
paulo@0 232
paulo@0 233 /* multiply by the number of leaves */
paulo@0 234 users *= avg_leaves;
paulo@0 235
paulo@0 236 /* divide by average redundancy level. TODO: calculate this dynamically */
paulo@0 237 users /= ((3 + 1) / 2);
paulo@0 238
paulo@0 239 /* divide by 3 to account for cycles */
paulo@0 240 users /= 3;
paulo@0 241
paulo@0 242 /* multiply by 2 because, well...this whole thing is misleading anyway,
paulo@0 243 * and the total number of users is greater than the horizon */
paulo@0 244 users *= 2;
paulo@0 245
paulo@0 246 return users;
paulo@0 247 }
paulo@0 248
paulo@0 249 /*****************************************************************************/
paulo@0 250
paulo@0 251 /*
paulo@0 252 * TODO: accumulate statistics on average leaves/cycles on our immediate
paulo@0 253 * peers.
paulo@0 254 */
paulo@0 255 int gnutella_stats (Protocol *p, unsigned long *users, unsigned long *files,
paulo@0 256 double *size, Dataset **extra)
paulo@0 257 {
paulo@0 258 struct gt_stats pong_stats;
paulo@0 259 struct gt_stats conn_stats;
paulo@0 260 struct gt_stats avg_stats;
paulo@0 261 size_t connected;
paulo@0 262
paulo@0 263 *files = *users = *size = 0;
paulo@0 264
paulo@0 265 connected = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED);
paulo@0 266
paulo@0 267 if (connected == 0)
paulo@0 268 return 0;
paulo@0 269
paulo@0 270 get_pong_stats (&pong_stats);
paulo@0 271 get_conn_stats (&conn_stats);
paulo@0 272
paulo@0 273 if (conn_stats.users == 0)
paulo@0 274 conn_stats.users = 1;
paulo@0 275
paulo@0 276 if (pong_stats.users == 0)
paulo@0 277 pong_stats.users = 1;
paulo@0 278
paulo@0 279 /* divide the total files size by two since it is inflated by ultrapeers
paulo@0 280 * abusing it to communicate their ultrapeer-ness to other nodes */
paulo@0 281 pong_stats.size_kb /= 2;
paulo@0 282 conn_stats.size_kb /= 2;
paulo@0 283
paulo@0 284 /* find the average of the data for our two sources */
paulo@0 285 pong_stats.size_kb /= pong_stats.users;
paulo@0 286 pong_stats.files /= pong_stats.users;
paulo@0 287 conn_stats.size_kb /= conn_stats.users;
paulo@0 288 conn_stats.files /= conn_stats.users;
paulo@0 289
paulo@0 290 /* put the stats of previously connected nodes on equal footing with the
paulo@0 291 * stats collected from pongs -- they should be more reliable */
paulo@0 292 avg_stats.files = (pong_stats.files + conn_stats.files) / 2;
paulo@0 293 avg_stats.size_kb = (pong_stats.size_kb + conn_stats.size_kb) / 2;
paulo@0 294
paulo@0 295 /* add conn_stats.users for a little variation (not much) */
paulo@0 296 avg_stats.users = guess_users () + conn_stats.users;
paulo@0 297
paulo@0 298 *users = avg_stats.users;
paulo@0 299 *files = avg_stats.files * avg_stats.users;
paulo@0 300 *size = avg_stats.size_kb * avg_stats.users / 1024 / 1024;
paulo@0 301
paulo@0 302 return connected;
paulo@0 303 }