Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
comparison src/gt_stats.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:928f8df9dd82 |
---|---|
1 /* | |
2 * $Id: gt_stats.c,v 1.21 2005/01/04 14:59:58 mkern Exp $ | |
3 * | |
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU General Public License as published by the | |
8 * Free Software Foundation; either version 2, or (at your option) any | |
9 * later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, but | |
12 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * General Public License for more details. | |
15 */ | |
16 | |
17 #include "gt_gnutella.h" | |
18 | |
19 #include "gt_node.h" | |
20 #include "gt_node_list.h" | |
21 | |
22 #include "gt_search.h" | |
23 | |
24 /*****************************************************************************/ | |
25 | |
26 struct gt_stats | |
27 { | |
28 double size_kb; | |
29 unsigned long files; | |
30 size_t users; | |
31 }; | |
32 | |
33 /* number of stats samples to accumulate */ | |
34 #define NR_SAMPLES 64 | |
35 | |
36 /* number of samples to take surrounding the median */ | |
37 #define NR_MEDIAN_SAMPLES 2 | |
38 | |
39 /*****************************************************************************/ | |
40 | |
41 /* | |
42 * This should be computed from our neighboring peers, but for now it | |
43 * hardcodes the max leaves for the most numerous nodes on the network, | |
44 * Limewire (32) and BearShare (10) | |
45 */ | |
46 static int avg_leaves = (32 + 10) / 2; | |
47 | |
48 /* default ttl we use in searches */ | |
49 static int default_ttl = GT_SEARCH_TTL; | |
50 | |
51 /* default outdegree for most nodes */ | |
52 static int default_degree = 6; | |
53 | |
54 /* keep track of the stats for the last NR_SAMPLES pongs */ | |
55 static struct gt_stats samples[NR_SAMPLES]; | |
56 static size_t samples_index; | |
57 static size_t samples_count; | |
58 | |
59 /*****************************************************************************/ | |
60 | |
61 void gt_stats_accumulate (in_addr_t ipv4, in_port_t port, | |
62 in_addr_t src_ip, uint32_t files, | |
63 uint32_t size_kb) | |
64 { | |
65 samples[samples_index].files = files; | |
66 samples[samples_index].size_kb = size_kb; | |
67 | |
68 samples_index = (samples_index + 1) % NR_SAMPLES; | |
69 samples_count++; | |
70 | |
71 if (samples_count > NR_SAMPLES) | |
72 samples_count = NR_SAMPLES; | |
73 } | |
74 | |
75 static int stats_cmp (const void *a, const void *b) | |
76 { | |
77 const struct gt_stats *a_s = a; | |
78 const struct gt_stats *b_s = b; | |
79 | |
80 return a_s->size_kb - b_s->size_kb; | |
81 } | |
82 | |
83 static void clear_stats (struct gt_stats *stats) | |
84 { | |
85 stats->size_kb = 0.0; | |
86 stats->files = 0; | |
87 stats->users = 0; | |
88 } | |
89 | |
90 static void get_median_stats (struct gt_stats *pong_stats, size_t nr) | |
91 { | |
92 int low, high; | |
93 int mid; | |
94 int i; | |
95 | |
96 if (nr == 0) | |
97 return; | |
98 | |
99 mid = nr / 2; | |
100 | |
101 low = mid - NR_MEDIAN_SAMPLES; | |
102 high = mid + NR_MEDIAN_SAMPLES; | |
103 | |
104 if (low < 0) | |
105 low = 0; | |
106 | |
107 if (high > nr - 1) | |
108 high = nr - 1; | |
109 | |
110 for (i = low; i <= high; i++) | |
111 { | |
112 pong_stats->size_kb += samples[i].size_kb; | |
113 pong_stats->files += samples[i].files; | |
114 pong_stats->users++; | |
115 } | |
116 } | |
117 | |
118 static void get_pong_stats (struct gt_stats *pong_stats) | |
119 { | |
120 qsort (samples, samples_count, sizeof (struct gt_stats), stats_cmp); | |
121 | |
122 clear_stats (pong_stats); | |
123 get_median_stats (pong_stats, samples_count); | |
124 } | |
125 | |
126 /*****************************************************************************/ | |
127 | |
128 static TCPC *count_stats (TCPC *c, GtNode *node, struct gt_stats *st) | |
129 { | |
130 /* sanity check */ | |
131 if (node->size_kb == (uint32_t)-1 || node->files == (uint32_t)-1) | |
132 return NULL; | |
133 | |
134 st->size_kb += node->size_kb; | |
135 st->files += node->files; | |
136 | |
137 if (node->vitality > 0) | |
138 st->users++; | |
139 | |
140 return NULL; | |
141 } | |
142 | |
143 static void get_conn_stats (struct gt_stats *conn_stats) | |
144 { | |
145 clear_stats (conn_stats); | |
146 | |
147 /* grab statistics from the nodes structures */ | |
148 gt_conn_foreach (GT_CONN_FOREACH(count_stats), conn_stats, | |
149 GT_NODE_NONE, GT_NODE_ANY, 0); | |
150 } | |
151 | |
152 /*****************************************************************************/ | |
153 | |
154 /* no need to pull in math.h for this simple thing: exponent is small */ | |
155 static unsigned long int_pow (int base, int exponent) | |
156 { | |
157 unsigned long total = 1; | |
158 | |
159 while (exponent-- > 0) | |
160 total *= base; | |
161 | |
162 return total; | |
163 } | |
164 | |
165 static unsigned long sum_network (int degree, int ttl) | |
166 { | |
167 int i; | |
168 unsigned long sum; | |
169 | |
170 if (ttl <= 0) | |
171 return 0; | |
172 | |
173 sum = degree; | |
174 | |
175 for (i = 2; i <= ttl; i++) | |
176 sum += int_pow (degree-1, i-1) * degree; | |
177 | |
178 return sum; | |
179 } | |
180 | |
181 /* | |
182 * For each node we're connected to, check the 'X-Degree' and 'X-Max-TTL' | |
183 * handshake headers to use for degree and ttl in the calculations. | |
184 */ | |
185 static GtNode *count_edges (TCPC *c, GtNode *node, void *udata) | |
186 { | |
187 unsigned long *count = udata; | |
188 unsigned long degree = 0; | |
189 unsigned long ttl = 0; | |
190 char *max_ttl_str; | |
191 char *x_degree_str; | |
192 | |
193 if ((max_ttl_str = dataset_lookupstr (node->hdr, "x-max-ttl")) != NULL) | |
194 ttl = ATOUL (max_ttl_str); | |
195 | |
196 if ((x_degree_str = dataset_lookupstr (node->hdr, "x-degree")) != NULL) | |
197 degree = ATOUL (x_degree_str); | |
198 | |
199 if (degree == 0 || degree > 200) | |
200 degree = default_degree; | |
201 | |
202 if (ttl == 0 || ttl > 30) | |
203 ttl = default_ttl; | |
204 | |
205 /* workaround limewire misfeature where it appears X-Max-TTL is set | |
206 * too high */ | |
207 if (degree > 30 && ttl > 5) | |
208 ttl = default_ttl; | |
209 | |
210 *count += sum_network (degree, ttl); | |
211 return NULL; | |
212 } | |
213 | |
214 /* | |
215 * Of course this is totally inaccurate, and not even dynamic. It | |
216 * approximates the maximum number of users in a network of | |
217 * Gnutella's structure in a given outdegree and TTL range | |
218 * (default_degree, default_ttl). This doesn't represent the whole | |
219 * network, and since the network has a significant amount of | |
220 * cycles this calculation is likely way too much. | |
221 * | |
222 * To compensate, we divide by 3. We could make a better guess | |
223 * about the number of nodes if we could find the number of cycles | |
224 * some way, but this information is not readily available. | |
225 */ | |
226 static unsigned long guess_users (void) | |
227 { | |
228 unsigned long users = 0; | |
229 | |
230 gt_conn_foreach (count_edges, &users, | |
231 GT_NODE_ULTRA, GT_NODE_CONNECTED, 0); | |
232 | |
233 /* multiply by the number of leaves */ | |
234 users *= avg_leaves; | |
235 | |
236 /* divide by average redundancy level. TODO: calculate this dynamically */ | |
237 users /= ((3 + 1) / 2); | |
238 | |
239 /* divide by 3 to account for cycles */ | |
240 users /= 3; | |
241 | |
242 /* multiply by 2 because, well...this whole thing is misleading anyway, | |
243 * and the total number of users is greater than the horizon */ | |
244 users *= 2; | |
245 | |
246 return users; | |
247 } | |
248 | |
249 /*****************************************************************************/ | |
250 | |
251 /* | |
252 * TODO: accumulate statistics on average leaves/cycles on our immediate | |
253 * peers. | |
254 */ | |
255 int gnutella_stats (Protocol *p, unsigned long *users, unsigned long *files, | |
256 double *size, Dataset **extra) | |
257 { | |
258 struct gt_stats pong_stats; | |
259 struct gt_stats conn_stats; | |
260 struct gt_stats avg_stats; | |
261 size_t connected; | |
262 | |
263 *files = *users = *size = 0; | |
264 | |
265 connected = gt_conn_length (GT_NODE_ULTRA, GT_NODE_CONNECTED); | |
266 | |
267 if (connected == 0) | |
268 return 0; | |
269 | |
270 get_pong_stats (&pong_stats); | |
271 get_conn_stats (&conn_stats); | |
272 | |
273 if (conn_stats.users == 0) | |
274 conn_stats.users = 1; | |
275 | |
276 if (pong_stats.users == 0) | |
277 pong_stats.users = 1; | |
278 | |
279 /* divide the total files size by two since it is inflated by ultrapeers | |
280 * abusing it to communicate their ultrapeer-ness to other nodes */ | |
281 pong_stats.size_kb /= 2; | |
282 conn_stats.size_kb /= 2; | |
283 | |
284 /* find the average of the data for our two sources */ | |
285 pong_stats.size_kb /= pong_stats.users; | |
286 pong_stats.files /= pong_stats.users; | |
287 conn_stats.size_kb /= conn_stats.users; | |
288 conn_stats.files /= conn_stats.users; | |
289 | |
290 /* put the stats of previously connected nodes on equal footing with the | |
291 * stats collected from pongs -- they should be more reliable */ | |
292 avg_stats.files = (pong_stats.files + conn_stats.files) / 2; | |
293 avg_stats.size_kb = (pong_stats.size_kb + conn_stats.size_kb) / 2; | |
294 | |
295 /* add conn_stats.users for a little variation (not much) */ | |
296 avg_stats.users = guess_users () + conn_stats.users; | |
297 | |
298 *users = avg_stats.users; | |
299 *files = avg_stats.files * avg_stats.users; | |
300 *size = avg_stats.size_kb * avg_stats.users / 1024 / 1024; | |
301 | |
302 return connected; | |
303 } |