comparison src/gt_stats.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:928f8df9dd82
1 /*
2 * $Id: gt_stats.c,v 1.21 2005/01/04 14:59:58 mkern Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
16
17 #include "gt_gnutella.h"
18
19 #include "gt_node.h"
20 #include "gt_node_list.h"
21
22 #include "gt_search.h"
23
24 /*****************************************************************************/
25
26 struct gt_stats
27 {
28 double size_kb;
29 unsigned long files;
30 size_t users;
31 };
32
33 /* number of stats samples to accumulate */
34 #define NR_SAMPLES 64
35
36 /* number of samples to take surrounding the median */
37 #define NR_MEDIAN_SAMPLES 2
38
39 /*****************************************************************************/
40
41 /*
42 * This should be computed from our neighboring peers, but for now it
43 * hardcodes the max leaves for the most numerous nodes on the network,
44 * Limewire (32) and BearShare (10)
45 */
46 static int avg_leaves = (32 + 10) / 2;
47
48 /* default ttl we use in searches */
49 static int default_ttl = GT_SEARCH_TTL;
50
51 /* default outdegree for most nodes */
52 static int default_degree = 6;
53
54 /* keep track of the stats for the last NR_SAMPLES pongs */
55 static struct gt_stats samples[NR_SAMPLES];
56 static size_t samples_index;
57 static size_t samples_count;
58
59 /*****************************************************************************/
60
61 void gt_stats_accumulate (in_addr_t ipv4, in_port_t port,
62 in_addr_t src_ip, uint32_t files,
63 uint32_t size_kb)
64 {
65 samples[samples_index].files = files;
66 samples[samples_index].size_kb = size_kb;
67
68 samples_index = (samples_index + 1) % NR_SAMPLES;
69 samples_count++;
70
71 if (samples_count > NR_SAMPLES)
72 samples_count = NR_SAMPLES;
73 }
74
75 static int stats_cmp (const void *a, const void *b)
76 {
77 const struct gt_stats *a_s = a;
78 const struct gt_stats *b_s = b;
79
80 return a_s->size_kb - b_s->size_kb;
81 }
82
83 static void clear_stats (struct gt_stats *stats)
84 {
85 stats->size_kb = 0.0;
86 stats->files = 0;
87 stats->users = 0;
88 }
89
90 static void get_median_stats (struct gt_stats *pong_stats, size_t nr)
91 {
92 int low, high;
93 int mid;
94 int i;
95
96 if (nr == 0)
97 return;
98
99 mid = nr / 2;
100
101 low = mid - NR_MEDIAN_SAMPLES;
102 high = mid + NR_MEDIAN_SAMPLES;
103
104 if (low < 0)
105 low = 0;
106
107 if (high > nr - 1)
108 high = nr - 1;
109
110 for (i = low; i <= high; i++)
111 {
112 pong_stats->size_kb += samples[i].size_kb;
113 pong_stats->files += samples[i].files;
114 pong_stats->users++;
115 }
116 }
117
118 static void get_pong_stats (struct gt_stats *pong_stats)
119 {
120 qsort (samples, samples_count, sizeof (struct gt_stats), stats_cmp);
121
122 clear_stats (pong_stats);
123 get_median_stats (pong_stats, samples_count);
124 }
125
126 /*****************************************************************************/
127
128 static TCPC *count_stats (TCPC *c, GtNode *node, struct gt_stats *st)
129 {
130 /* sanity check */
131 if (node->size_kb == (uint32_t)-1 || node->files == (uint32_t)-1)
132 return NULL;
133
134 st->size_kb += node->size_kb;
135 st->files += node->files;
136
137 if (node->vitality > 0)
138 st->users++;
139
140 return NULL;
141 }
142
143 static void get_conn_stats (struct gt_stats *conn_stats)
144 {
145 clear_stats (conn_stats);
146
147 /* grab statistics from the nodes structures */
148 gt_conn_foreach (GT_CONN_FOREACH(count_stats), conn_stats,
149 GT_NODE_NONE, GT_NODE_ANY, 0);
150 }
151
152 /*****************************************************************************/
153
154 /* no need to pull in math.h for this simple thing: exponent is small */
155 static unsigned long int_pow (int base, int exponent)
156 {
157 unsigned long total = 1;
158
159 while (exponent-- > 0)
160 total *= base;
161
162 return total;
163 }
164
165 static unsigned long sum_network (int degree, int ttl)
166 {
167 int i;
168 unsigned long sum;
169
170 if (ttl <= 0)
171 return 0;
172
173 sum = degree;
174
175 for (i = 2; i <= ttl; i++)
176 sum += int_pow (degree-1, i-1) * degree;
177
178 return sum;
179 }
180
181 /*
182 * For each node we're connected to, check the 'X-Degree' and 'X-Max-TTL'
183 * handshake headers to use for degree and ttl in the calculations.
184 */
185 static GtNode *count_edges (TCPC *c, GtNode *node, void *udata)
186 {
187 unsigned long *count = udata;
188 unsigned long degree = 0;
189 unsigned long ttl = 0;
190 char *max_ttl_str;
191 char *x_degree_str;
192
193 if ((max_ttl_str = dataset_lookupstr (node->hdr, "x-max-ttl")) != NULL)
194 ttl = ATOUL (max_ttl_str);
195
196 if ((x_degree_str = dataset_lookupstr (node->hdr, "x-degree")) != NULL)
197 degree = ATOUL (x_degree_str);
198
199 if (degree == 0 || degree > 200)
200 degree = default_degree;
201
202 if (ttl == 0 || ttl > 30)
203 ttl = default_ttl;
204
205 /* workaround limewire misfeature where it appears X-Max-TTL is set
206 * too high */
207 if (degree > 30 && ttl > 5)
208 ttl = default_ttl;
209
210 *count += sum_network (degree, ttl);
211 return NULL;
212 }
213
214 /*
215 * Of course this is totally inaccurate, and not even dynamic. It
216 * approximates the maximum number of users in a network of
217 * Gnutella's structure in a given outdegree and TTL range
218 * (default_degree, default_ttl). This doesn't represent the whole
219 * network, and since the network has a significant amount of
220 * cycles this calculation is likely way too much.
221 *
222 * To compensate, we divide by 3. We could make a better guess
223 * about the number of nodes if we could find the number of cycles
224 * some way, but this information is not readily available.
225 */
226 static unsigned long guess_users (void)
227 {
228 unsigned long users = 0;
229
230 gt_conn_foreach (count_edges, &users,
231 GT_NODE_ULTRA, GT_NODE_CONNECTED, 0);
232
233 /* multiply by the number of leaves */
234 users *= avg_leaves;
235
236 /* divide by average redundancy level. TODO: calculate this dynamically */
237 users /= ((3 + 1) / 2);
238
239 /* divide by 3 to account for cycles */
240 users /= 3;
241
242 /* multiply by 2 because, well...this whole thing is misleading anyway,
243 * and the total number of users is greater than the horizon */
244 users *= 2;
245
246 return users;
247 }
248
249 /*****************************************************************************/
250
251 /*
252 * TODO: accumulate statistics on average leaves/cycles on our immediate
253 * peers.
254 */
255 int gnutella_stats (Protocol *p, unsigned long *users, unsigned long *files,
256 double *size, Dataset **extra)
257 {
258 struct gt_stats pong_stats;
259 struct gt_stats conn_stats;
260 struct gt_stats avg_stats;
261 size_t connected;
262
263 *files = *users = *size = 0;
264
265 connected = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED);
266
267 if (connected == 0)
268 return 0;
269
270 get_pong_stats (&pong_stats);
271 get_conn_stats (&conn_stats);
272
273 if (conn_stats.users == 0)
274 conn_stats.users = 1;
275
276 if (pong_stats.users == 0)
277 pong_stats.users = 1;
278
279 /* divide the total files size by two since it is inflated by ultrapeers
280 * abusing it to communicate their ultrapeer-ness to other nodes */
281 pong_stats.size_kb /= 2;
282 conn_stats.size_kb /= 2;
283
284 /* find the average of the data for our two sources */
285 pong_stats.size_kb /= pong_stats.users;
286 pong_stats.files /= pong_stats.users;
287 conn_stats.size_kb /= conn_stats.users;
288 conn_stats.files /= conn_stats.users;
289
290 /* put the stats of previously connected nodes on equal footing with the
291 * stats collected from pongs -- they should be more reliable */
292 avg_stats.files = (pong_stats.files + conn_stats.files) / 2;
293 avg_stats.size_kb = (pong_stats.size_kb + conn_stats.size_kb) / 2;
294
295 /* add conn_stats.users for a little variation (not much) */
296 avg_stats.users = guess_users () + conn_stats.users;
297
298 *users = avg_stats.users;
299 *files = avg_stats.files * avg_stats.users;
300 *size = avg_stats.size_kb * avg_stats.users / 1024 / 1024;
301
302 return connected;
303 }