rev |
line source |
paulo@0
|
1 /*
|
paulo@0
|
2 * $Id: gt_stats.c,v 1.21 2005/01/04 14:59:58 mkern Exp $
|
paulo@0
|
3 *
|
paulo@0
|
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
|
paulo@0
|
5 *
|
paulo@0
|
6 * This program is free software; you can redistribute it and/or modify it
|
paulo@0
|
7 * under the terms of the GNU General Public License as published by the
|
paulo@0
|
8 * Free Software Foundation; either version 2, or (at your option) any
|
paulo@0
|
9 * later version.
|
paulo@0
|
10 *
|
paulo@0
|
11 * This program is distributed in the hope that it will be useful, but
|
paulo@0
|
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
|
paulo@0
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
paulo@0
|
14 * General Public License for more details.
|
paulo@0
|
15 */
|
paulo@0
|
16
|
paulo@0
|
17 #include "gt_gnutella.h"
|
paulo@0
|
18
|
paulo@0
|
19 #include "gt_node.h"
|
paulo@0
|
20 #include "gt_node_list.h"
|
paulo@0
|
21
|
paulo@0
|
22 #include "gt_search.h"
|
paulo@0
|
23
|
paulo@0
|
24 /*****************************************************************************/
|
paulo@0
|
25
|
paulo@0
|
26 struct gt_stats
|
paulo@0
|
27 {
|
paulo@0
|
28 double size_kb;
|
paulo@0
|
29 unsigned long files;
|
paulo@0
|
30 size_t users;
|
paulo@0
|
31 };
|
paulo@0
|
32
|
paulo@0
|
33 /* number of stats samples to accumulate */
|
paulo@0
|
34 #define NR_SAMPLES 64
|
paulo@0
|
35
|
paulo@0
|
36 /* number of samples to take surrounding the median */
|
paulo@0
|
37 #define NR_MEDIAN_SAMPLES 2
|
paulo@0
|
38
|
paulo@0
|
39 /*****************************************************************************/
|
paulo@0
|
40
|
paulo@0
|
41 /*
|
paulo@0
|
42 * This should be computed from our neighboring peers, but for now it
|
paulo@0
|
43 * hardcodes the max leaves for the most numerous nodes on the network,
|
paulo@0
|
44 * Limewire (32) and BearShare (10)
|
paulo@0
|
45 */
|
paulo@0
|
46 static int avg_leaves = (32 + 10) / 2;
|
paulo@0
|
47
|
paulo@0
|
48 /* default ttl we use in searches */
|
paulo@0
|
49 static int default_ttl = GT_SEARCH_TTL;
|
paulo@0
|
50
|
paulo@0
|
51 /* default outdegree for most nodes */
|
paulo@0
|
52 static int default_degree = 6;
|
paulo@0
|
53
|
paulo@0
|
54 /* keep track of the stats for the last NR_SAMPLES pongs */
|
paulo@0
|
55 static struct gt_stats samples[NR_SAMPLES];
|
paulo@0
|
56 static size_t samples_index;
|
paulo@0
|
57 static size_t samples_count;
|
paulo@0
|
58
|
paulo@0
|
59 /*****************************************************************************/
|
paulo@0
|
60
|
paulo@0
|
61 void gt_stats_accumulate (in_addr_t ipv4, in_port_t port,
|
paulo@0
|
62 in_addr_t src_ip, uint32_t files,
|
paulo@0
|
63 uint32_t size_kb)
|
paulo@0
|
64 {
|
paulo@0
|
65 samples[samples_index].files = files;
|
paulo@0
|
66 samples[samples_index].size_kb = size_kb;
|
paulo@0
|
67
|
paulo@0
|
68 samples_index = (samples_index + 1) % NR_SAMPLES;
|
paulo@0
|
69 samples_count++;
|
paulo@0
|
70
|
paulo@0
|
71 if (samples_count > NR_SAMPLES)
|
paulo@0
|
72 samples_count = NR_SAMPLES;
|
paulo@0
|
73 }
|
paulo@0
|
74
|
paulo@0
|
75 static int stats_cmp (const void *a, const void *b)
|
paulo@0
|
76 {
|
paulo@0
|
77 const struct gt_stats *a_s = a;
|
paulo@0
|
78 const struct gt_stats *b_s = b;
|
paulo@0
|
79
|
paulo@0
|
80 return a_s->size_kb - b_s->size_kb;
|
paulo@0
|
81 }
|
paulo@0
|
82
|
paulo@0
|
83 static void clear_stats (struct gt_stats *stats)
|
paulo@0
|
84 {
|
paulo@0
|
85 stats->size_kb = 0.0;
|
paulo@0
|
86 stats->files = 0;
|
paulo@0
|
87 stats->users = 0;
|
paulo@0
|
88 }
|
paulo@0
|
89
|
paulo@0
|
90 static void get_median_stats (struct gt_stats *pong_stats, size_t nr)
|
paulo@0
|
91 {
|
paulo@0
|
92 int low, high;
|
paulo@0
|
93 int mid;
|
paulo@0
|
94 int i;
|
paulo@0
|
95
|
paulo@0
|
96 if (nr == 0)
|
paulo@0
|
97 return;
|
paulo@0
|
98
|
paulo@0
|
99 mid = nr / 2;
|
paulo@0
|
100
|
paulo@0
|
101 low = mid - NR_MEDIAN_SAMPLES;
|
paulo@0
|
102 high = mid + NR_MEDIAN_SAMPLES;
|
paulo@0
|
103
|
paulo@0
|
104 if (low < 0)
|
paulo@0
|
105 low = 0;
|
paulo@0
|
106
|
paulo@0
|
107 if (high > nr - 1)
|
paulo@0
|
108 high = nr - 1;
|
paulo@0
|
109
|
paulo@0
|
110 for (i = low; i <= high; i++)
|
paulo@0
|
111 {
|
paulo@0
|
112 pong_stats->size_kb += samples[i].size_kb;
|
paulo@0
|
113 pong_stats->files += samples[i].files;
|
paulo@0
|
114 pong_stats->users++;
|
paulo@0
|
115 }
|
paulo@0
|
116 }
|
paulo@0
|
117
|
paulo@0
|
118 static void get_pong_stats (struct gt_stats *pong_stats)
|
paulo@0
|
119 {
|
paulo@0
|
120 qsort (samples, samples_count, sizeof (struct gt_stats), stats_cmp);
|
paulo@0
|
121
|
paulo@0
|
122 clear_stats (pong_stats);
|
paulo@0
|
123 get_median_stats (pong_stats, samples_count);
|
paulo@0
|
124 }
|
paulo@0
|
125
|
paulo@0
|
126 /*****************************************************************************/
|
paulo@0
|
127
|
paulo@0
|
128 static TCPC *count_stats (TCPC *c, GtNode *node, struct gt_stats *st)
|
paulo@0
|
129 {
|
paulo@0
|
130 /* sanity check */
|
paulo@0
|
131 if (node->size_kb == (uint32_t)-1 || node->files == (uint32_t)-1)
|
paulo@0
|
132 return NULL;
|
paulo@0
|
133
|
paulo@0
|
134 st->size_kb += node->size_kb;
|
paulo@0
|
135 st->files += node->files;
|
paulo@0
|
136
|
paulo@0
|
137 if (node->vitality > 0)
|
paulo@0
|
138 st->users++;
|
paulo@0
|
139
|
paulo@0
|
140 return NULL;
|
paulo@0
|
141 }
|
paulo@0
|
142
|
paulo@0
|
143 static void get_conn_stats (struct gt_stats *conn_stats)
|
paulo@0
|
144 {
|
paulo@0
|
145 clear_stats (conn_stats);
|
paulo@0
|
146
|
paulo@0
|
147 /* grab statistics from the nodes structures */
|
paulo@0
|
148 gt_conn_foreach (GT_CONN_FOREACH(count_stats), conn_stats,
|
paulo@0
|
149 GT_NODE_NONE, GT_NODE_ANY, 0);
|
paulo@0
|
150 }
|
paulo@0
|
151
|
paulo@0
|
152 /*****************************************************************************/
|
paulo@0
|
153
|
paulo@0
|
154 /* no need to pull in math.h for this simple thing: exponent is small */
|
paulo@0
|
155 static unsigned long int_pow (int base, int exponent)
|
paulo@0
|
156 {
|
paulo@0
|
157 unsigned long total = 1;
|
paulo@0
|
158
|
paulo@0
|
159 while (exponent-- > 0)
|
paulo@0
|
160 total *= base;
|
paulo@0
|
161
|
paulo@0
|
162 return total;
|
paulo@0
|
163 }
|
paulo@0
|
164
|
paulo@0
|
165 static unsigned long sum_network (int degree, int ttl)
|
paulo@0
|
166 {
|
paulo@0
|
167 int i;
|
paulo@0
|
168 unsigned long sum;
|
paulo@0
|
169
|
paulo@0
|
170 if (ttl <= 0)
|
paulo@0
|
171 return 0;
|
paulo@0
|
172
|
paulo@0
|
173 sum = degree;
|
paulo@0
|
174
|
paulo@0
|
175 for (i = 2; i <= ttl; i++)
|
paulo@0
|
176 sum += int_pow (degree-1, i-1) * degree;
|
paulo@0
|
177
|
paulo@0
|
178 return sum;
|
paulo@0
|
179 }
|
paulo@0
|
180
|
paulo@0
|
181 /*
|
paulo@0
|
182 * For each node we're connected to, check the 'X-Degree' and 'X-Max-TTL'
|
paulo@0
|
183 * handshake headers to use for degree and ttl in the calculations.
|
paulo@0
|
184 */
|
paulo@0
|
185 static GtNode *count_edges (TCPC *c, GtNode *node, void *udata)
|
paulo@0
|
186 {
|
paulo@0
|
187 unsigned long *count = udata;
|
paulo@0
|
188 unsigned long degree = 0;
|
paulo@0
|
189 unsigned long ttl = 0;
|
paulo@0
|
190 char *max_ttl_str;
|
paulo@0
|
191 char *x_degree_str;
|
paulo@0
|
192
|
paulo@0
|
193 if ((max_ttl_str = dataset_lookupstr (node->hdr, "x-max-ttl")) != NULL)
|
paulo@0
|
194 ttl = ATOUL (max_ttl_str);
|
paulo@0
|
195
|
paulo@0
|
196 if ((x_degree_str = dataset_lookupstr (node->hdr, "x-degree")) != NULL)
|
paulo@0
|
197 degree = ATOUL (x_degree_str);
|
paulo@0
|
198
|
paulo@0
|
199 if (degree == 0 || degree > 200)
|
paulo@0
|
200 degree = default_degree;
|
paulo@0
|
201
|
paulo@0
|
202 if (ttl == 0 || ttl > 30)
|
paulo@0
|
203 ttl = default_ttl;
|
paulo@0
|
204
|
paulo@0
|
205 /* workaround limewire misfeature where it appears X-Max-TTL is set
|
paulo@0
|
206 * too high */
|
paulo@0
|
207 if (degree > 30 && ttl > 5)
|
paulo@0
|
208 ttl = default_ttl;
|
paulo@0
|
209
|
paulo@0
|
210 *count += sum_network (degree, ttl);
|
paulo@0
|
211 return NULL;
|
paulo@0
|
212 }
|
paulo@0
|
213
|
paulo@0
|
214 /*
|
paulo@0
|
215 * Of course this is totally inaccurate, and not even dynamic. It
|
paulo@0
|
216 * approximates the maximum number of users in a network of
|
paulo@0
|
217 * Gnutella's structure in a given outdegree and TTL range
|
paulo@0
|
218 * (default_degree, default_ttl). This doesn't represent the whole
|
paulo@0
|
219 * network, and since the network has a significant amount of
|
paulo@0
|
220 * cycles this calculation is likely way too much.
|
paulo@0
|
221 *
|
paulo@0
|
222 * To compensate, we divide by 3. We could make a better guess
|
paulo@0
|
223 * about the number of nodes if we could find the number of cycles
|
paulo@0
|
224 * some way, but this information is not readily available.
|
paulo@0
|
225 */
|
paulo@0
|
226 static unsigned long guess_users (void)
|
paulo@0
|
227 {
|
paulo@0
|
228 unsigned long users = 0;
|
paulo@0
|
229
|
paulo@0
|
230 gt_conn_foreach (count_edges, &users,
|
paulo@0
|
231 GT_NODE_ULTRA, GT_NODE_CONNECTED, 0);
|
paulo@0
|
232
|
paulo@0
|
233 /* multiply by the number of leaves */
|
paulo@0
|
234 users *= avg_leaves;
|
paulo@0
|
235
|
paulo@0
|
236 /* divide by average redundancy level. TODO: calculate this dynamically */
|
paulo@0
|
237 users /= ((3 + 1) / 2);
|
paulo@0
|
238
|
paulo@0
|
239 /* divide by 3 to account for cycles */
|
paulo@0
|
240 users /= 3;
|
paulo@0
|
241
|
paulo@0
|
242 /* multiply by 2 because, well...this whole thing is misleading anyway,
|
paulo@0
|
243 * and the total number of users is greater than the horizon */
|
paulo@0
|
244 users *= 2;
|
paulo@0
|
245
|
paulo@0
|
246 return users;
|
paulo@0
|
247 }
|
paulo@0
|
248
|
paulo@0
|
249 /*****************************************************************************/
|
paulo@0
|
250
|
paulo@0
|
251 /*
|
paulo@0
|
252 * TODO: accumulate statistics on average leaves/cycles on our immediate
|
paulo@0
|
253 * peers.
|
paulo@0
|
254 */
|
paulo@0
|
255 int gnutella_stats (Protocol *p, unsigned long *users, unsigned long *files,
|
paulo@0
|
256 double *size, Dataset **extra)
|
paulo@0
|
257 {
|
paulo@0
|
258 struct gt_stats pong_stats;
|
paulo@0
|
259 struct gt_stats conn_stats;
|
paulo@0
|
260 struct gt_stats avg_stats;
|
paulo@0
|
261 size_t connected;
|
paulo@0
|
262
|
paulo@0
|
263 *files = *users = *size = 0;
|
paulo@0
|
264
|
paulo@0
|
265 connected = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED);
|
paulo@0
|
266
|
paulo@0
|
267 if (connected == 0)
|
paulo@0
|
268 return 0;
|
paulo@0
|
269
|
paulo@0
|
270 get_pong_stats (&pong_stats);
|
paulo@0
|
271 get_conn_stats (&conn_stats);
|
paulo@0
|
272
|
paulo@0
|
273 if (conn_stats.users == 0)
|
paulo@0
|
274 conn_stats.users = 1;
|
paulo@0
|
275
|
paulo@0
|
276 if (pong_stats.users == 0)
|
paulo@0
|
277 pong_stats.users = 1;
|
paulo@0
|
278
|
paulo@0
|
279 /* divide the total files size by two since it is inflated by ultrapeers
|
paulo@0
|
280 * abusing it to communicate their ultrapeer-ness to other nodes */
|
paulo@0
|
281 pong_stats.size_kb /= 2;
|
paulo@0
|
282 conn_stats.size_kb /= 2;
|
paulo@0
|
283
|
paulo@0
|
284 /* find the average of the data for our two sources */
|
paulo@0
|
285 pong_stats.size_kb /= pong_stats.users;
|
paulo@0
|
286 pong_stats.files /= pong_stats.users;
|
paulo@0
|
287 conn_stats.size_kb /= conn_stats.users;
|
paulo@0
|
288 conn_stats.files /= conn_stats.users;
|
paulo@0
|
289
|
paulo@0
|
290 /* put the stats of previously connected nodes on equal footing with the
|
paulo@0
|
291 * stats collected from pongs -- they should be more reliable */
|
paulo@0
|
292 avg_stats.files = (pong_stats.files + conn_stats.files) / 2;
|
paulo@0
|
293 avg_stats.size_kb = (pong_stats.size_kb + conn_stats.size_kb) / 2;
|
paulo@0
|
294
|
paulo@0
|
295 /* add conn_stats.users for a little variation (not much) */
|
paulo@0
|
296 avg_stats.users = guess_users () + conn_stats.users;
|
paulo@0
|
297
|
paulo@0
|
298 *users = avg_stats.users;
|
paulo@0
|
299 *files = avg_stats.files * avg_stats.users;
|
paulo@0
|
300 *size = avg_stats.size_kb * avg_stats.users / 1024 / 1024;
|
paulo@0
|
301
|
paulo@0
|
302 return connected;
|
paulo@0
|
303 }
|