GRASS Programmer's Manual  6.5.svn(2014)-r66266
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
class.c
Go to the documentation of this file.
1 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
2 
3 #include <grass/glocale.h>
4 #include <grass/arraystats.h>
5 
6 double class_apply_algorithm(char *algo, double *data, int nrec, int *nbreaks,
7  double *classbreaks)
8 {
9  double finfo = 0.0;
10 
11  if (G_strcasecmp(algo, "int") == 0)
12  finfo = class_interval(data, nrec, *nbreaks, classbreaks);
13  else if (G_strcasecmp(algo, "std") == 0)
14  finfo = class_stdev(data, nrec, *nbreaks, classbreaks);
15  else if (G_strcasecmp(algo, "qua") == 0)
16  finfo = class_quant(data, nrec, *nbreaks, classbreaks);
17  else if (G_strcasecmp(algo, "equ") == 0)
18  finfo = class_equiprob(data, nrec, nbreaks, classbreaks);
19  else if (G_strcasecmp(algo, "dis") == 0)
20  /* finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
21  G_fatal_error(_("Discont algorithm currently not available because of bugs"));
22  else
23  G_fatal_error(_("%s: Unknown algorithm"), algo);
24 
25  if (finfo == 0)
26  G_fatal_error(_("%s: Error in classification algorithm"), algo);
27 
28  return finfo;
29 }
30 
31 int class_interval(double *data, int count, int nbreaks, double *classbreaks)
32 {
33  double min, max;
34  double step;
35  int i = 0;
36 
37  min = data[0];
38  max = data[count - 1];
39 
40  step = (max - min) / (nbreaks + 1);
41 
42  for (i = 0; i < nbreaks; i++)
43  classbreaks[i] = min + (step * (i + 1));
44 
45  return (1);
46 }
47 
48 double class_stdev(double *data, int count, int nbreaks, double *classbreaks)
49 {
50  struct GASTATS stats;
51  int i;
52  int nbclass;
53  double scale = 1.0;
54 
55  basic_stats(data, count, &stats);
56 
57  nbclass = nbreaks + 1;
58 
59  if (nbclass % 2 == 1) { /* number of classes is uneven so we center middle class on mean */
60 
61  /* find appropriate fraction of stdev for step */
62  i = 1;
63  while (i) {
64  if (((stats.mean + stats.stdev * scale / 2) +
65  (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
66  ((stats.mean - stats.stdev * scale / 2) -
67  (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
68  scale = scale / 2;
69  else
70  i = 0;
71  }
72 
73  /* classbreaks below the mean */
74  for (i = 0; i < nbreaks / 2; i++)
75  classbreaks[i] =
76  (stats.mean - stats.stdev * scale / 2) -
77  stats.stdev * scale * (nbreaks / 2 - (i + 1));
78  /* classbreaks above the mean */
79  for (i = i; i < nbreaks; i++)
80  classbreaks[i] =
81  (stats.mean + stats.stdev * scale / 2) +
82  stats.stdev * scale * (i - nbreaks / 2);
83 
84  }
85  else { /* number of classes is even so mean is a classbreak */
86 
87  /* decide whether to use 1*stdev or 0.5*stdev as step */
88  i = 1;
89  while (i) {
90  if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
91  stats.max) ||
92  ((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
93  stats.min))
94  scale = scale / 2;
95  else
96  i = 0;
97  }
98 
99  /* classbreaks below the mean and on the mean */
100  for (i = 0; i <= nbreaks / 2; i++)
101  classbreaks[i] =
102  stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
103  /* classbreaks above the mean */
104  for (i = i; i < nbreaks; i++)
105  classbreaks[i] =
106  stats.mean + stats.stdev * scale * (i - nbreaks / 2);
107  }
108 
109  return (scale);
110 }
111 
112 int class_quant(double *data, int count, int nbreaks, double *classbreaks)
113 {
114  int i, step;
115 
116  step = count / (nbreaks + 1);
117 
118  for (i = 0; i < nbreaks; i++)
119  classbreaks[i] = data[step * (i + 1)];
120 
121  return (1);
122 }
123 
124 
125 int class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
126 {
127  int i, j;
128  double *lequi; /*Vector of scale factors for probabilities of the normal distribution */
129  struct GASTATS stats;
130  int nbclass;
131 
132  nbclass = *nbreaks + 1;
133 
134  lequi = G_malloc(*nbreaks * sizeof(double));
135 
136  /*The following values come from the normal distribution and
137  * will be used as:
138  * classbreak[i] = (lequi[i] * stdev) + mean;
139  */
140 
141  if (nbclass < 3) {
142  lequi[0] = 0;
143  }
144  else if (nbclass == 3) {
145  lequi[0] = -0.43076;
146  lequi[1] = 0.43076;
147  }
148  else if (nbclass == 4) {
149  lequi[0] = -0.6745;
150  lequi[1] = 0;
151  lequi[2] = 0.6745;
152  }
153  else if (nbclass == 5) {
154  lequi[0] = -0.8416;
155  lequi[1] = -0.2533;
156  lequi[2] = 0.2533;
157  lequi[3] = 0.8416;
158  }
159  else if (nbclass == 6) {
160  lequi[0] = -0.9676;
161  lequi[1] = -0.43076;
162  lequi[2] = 0;
163  lequi[3] = 0.43076;
164  lequi[4] = 0.9676;
165  }
166  else if (nbclass == 7) {
167  lequi[0] = -1.068;
168  lequi[1] = -0.566;
169  lequi[2] = -0.18;
170  lequi[3] = 0.18;
171  lequi[4] = 0.566;
172  lequi[5] = 1.068;
173  }
174  else if (nbclass == 8) {
175  lequi[0] = -1.1507;
176  lequi[1] = -0.6745;
177  lequi[2] = -0.3187;
178  lequi[3] = 0;
179  lequi[4] = 0.3187;
180  lequi[5] = 0.6745;
181  lequi[6] = 1.1507;
182  }
183  else if (nbclass == 9) {
184  lequi[0] = -1.2208;
185  lequi[1] = -0.7648;
186  lequi[2] = -0.4385;
187  lequi[3] = -0.1397;
188  lequi[4] = 0.1397;
189  lequi[5] = 0.4385;
190  lequi[6] = 0.7648;
191  lequi[7] = 1.2208;
192  }
193  else if (nbclass == 10) {
194  lequi[0] = -1.28155;
195  lequi[1] = -0.84162;
196  lequi[2] = -0.5244;
197  lequi[3] = -0.25335;
198  lequi[4] = 0;
199  lequi[5] = 0.25335;
200  lequi[6] = 0.5244;
201  lequi[7] = 0.84162;
202  lequi[8] = 1.28155;
203  }
204  else {
206  ("Equiprobable classbreaks currently limited to 10 classes");
207  }
208 
209  basic_stats(data, count, &stats);
210 
211  /*check if any of the classbreaks would fall outside of the range min-max */
212  j = 0;
213  for (i = 0; i < *nbreaks; i++) {
214  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
215  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
216  j++;
217  }
218  }
219 
220  if (j < (*nbreaks)) {
221  G_warning(_("There are classbreaks outside the range min-max. Number of classes reduced to %i, but using probabilities for %i classes."),
222  j + 1, *nbreaks + 1);
223  G_realloc(classbreaks, j * sizeof(double));
224  for (i = 0; i < j; i++)
225  classbreaks[i] = 0;
226  }
227 
228  j = 0;
229  for (i = 0; i < *nbreaks; i++) {
230  if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
231  (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
232  classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
233  j++;
234  }
235  }
236 
237  *nbreaks = j;
238 
239  G_free(lequi);
240  return (1);
241 }
242 
243 /* FixMe: there seems to a problem with array overflow, probably due to the fact that the code was ported from fortran which has 1-based arrays*/
244 double class_discont(double *data, int count, int nbreaks,
245  double *classbreaks)
246 {
247  int *num, nbclass;
248  double *no, *zz, *nz, *xn, *co;
249  double *x; //Vecteur des observations standardisées
250  int i, j, k;
251  double min = 0, max = 0, rangemax = 0;
252  int n = 0;
253  double rangemin = 0, xlim = 0;
254  double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
255  int nf = 0, nmax = 0;
256  double *abc;
257  int nd = 0;
258  double den = 0, d = 0;
259  int im = 0, ji = 0;
260  int tmp = 0;
261  int nff = 0, jj = 0, no1 = 0, no2 = 0;
262  double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
263 
264 
265  /*get the number of values */
266  n = count;
267 
268  nbclass = nbreaks + 1;
269 
270  num = G_malloc((nbclass + 1) * sizeof(int));
271  no = G_malloc((nbclass + 1) * sizeof(double));
272  zz = G_malloc((nbclass + 1) * sizeof(double));
273  nz = G_malloc(3 * sizeof(double));
274  xn = G_malloc((n + 1) * sizeof(double));
275  co = G_malloc((nbclass + 1) * sizeof(double));
276 
277  /* We copy the array of values to x, in order to be able to standardize it */
278  x = G_malloc((n + 1) * sizeof(double));
279  x[0] = n;
280  xn[0] = 0;
281 
282  min = data[0];
283  max = data[count - 1];
284  for (i = 1; i <= n; i++)
285  x[i] = data[i - 1];
286 
287  rangemax = max - min;
288  rangemin = rangemax;
289 
290  for (i = 2; i <= n; i++) {
291  if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
292  rangemin = x[i] - x[i - 1]; /* rangemin = minimal distance */
293  }
294 
295  /* STANDARDIZATION
296  * and creation of the number vector (xn) */
297 
298  for (i = 1; i <= n; i++) {
299  x[i] = (x[i] - min) / rangemax;
300  xn[i] = i / (double)n;
301  }
302  xlim = rangemin / rangemax;
303  rangemin = rangemin / 2.0;
304 
305  /* Searching for the limits */
306  num[1] = n;
307  abc = G_malloc(3 * sizeof(double));
308 
309  /* Loop through possible solutions */
310  for (i = 1; i <= nbclass; i++) {
311  nmax = 0;
312  dmax = 0.0;
313  d2 = 0.0;
314  nf = 0; /*End number */
315 
316  /* Loop through classes */
317  for (j = 1; j <= i; j++) {
318  nd = nf; /*Start number */
319  nf = num[j];
320  co[j] = 10e37;
321  eqdrt(x, xn, nd, nf, abc);
322  den = sqrt(pow(abc[1], 2) + 1.0);
323  nd++;
324  /* Loop through observations */
325  for (k = nd; k <= nf; k++) {
326  if (abc[2] == 0.0)
327  d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
328  else
329  d = fabs(x[k] - abc[2]);
330  d2 += pow(d, 2);
331  if (x[k] - x[nd] < xlim)
332  continue;
333  if (x[nf] - x[k] < xlim)
334  continue;
335  if (d <= dmax)
336  continue;
337  dmax = d;
338  nmax = k;
339  }
340  nd--; /* A VERIFIER! */
341  if (x[nf] != x[nd]) {
342  if (nd != 0)
343  co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
344  else
345  co[j] = (xn[nf]) / (x[nf]); /* A VERIFIER! */
346  }
347  }
348  if (i == 1)
349  dd = d2;
350  p = d2 / dd;
351  for (j = 1; j <= i; j++) {
352  no[j] = num[j];
353  zz[j] = x[num[j]] * rangemax + min;
354  if (j == i)
355  continue;
356  if (co[j] > co[j + 1]) {
357  zz[j] = zz[j] + rangemin;
358  continue;
359  }
360  zz[j] = zz[j] - rangemin;
361  no[j] = no[j] - 1;
362  }
363  im = i - 1;
364  if (im != 0.0) {
365  for (j = 1; j <= im; j++) {
366  ji = i + 1 - j;
367  no[ji] -= no[ji - 1];
368  }
369  }
370  if (nmax == 0) {
371  break;
372  }
373  nff = i + 2;
374  tmp = 0;
375  for (j = 1; j <= i; j++) {
376  jj = nff - j;
377  if (num[jj - 1] < nmax) {
378  num[jj] = nmax;
379  tmp = 1;
380  break;
381  }
382  num[jj] = num[jj - 1];
383  }
384  if (tmp == 0) {
385  num[1] = nmax;
386  jj = 1;
387  }
388  if (jj == 1) {
389  xnj_1 = 0;
390  xj_1 = 0;
391  }
392  else {
393  xnj_1 = xn[num[jj - 1]];
394  xj_1 = x[num[jj - 1]];
395  }
396  no1 = (xn[num[jj]] - xnj_1) * n;
397  no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
398  f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
399  f *= n;
400  xt1 = (x[num[jj]] - xj_1) * f;
401  xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
402  if (xt2 == 0) {
403  xt2 = rangemin / 2.0 / rangemax * f;
404  xt1 -= xt2;
405  }
406  else if (xt1 * xt2 == 0) {
407  xt1 = rangemin / 2.0 / rangemax * f;
408  xt2 -= xt1;
409  }
410 
411  /* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
412  if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
413  chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
414 
415  }
416 
417  /* Fill up classbreaks of i <=nbclass classes */
418  for (j = 0; j <= (i - 1); j++)
419  classbreaks[j] = zz[j + 1];
420 
421  return (chi2);
422 }
423 
424 int class_frequencies(double *data, int count, int nbreaks,
425  double *classbreaks, int *frequencies)
426 {
427  int i, j;
428  double min, max;
429 
430  min = data[0];
431  max = data[count - 1];
432  /* count cases in all classes, except for last class */
433  i = 0;
434  for (j = 0; j < nbreaks; j++) {
435  while (data[i] <= classbreaks[j]) {
436  frequencies[j]++;
437  i++;
438  }
439  }
440 
441  /*Now count cases in last class */
442  for (i = i; i < count; i++) {
443  frequencies[nbreaks]++;
444  }
445 
446  return (1);
447 }
int G_strcasecmp(const char *x, const char *y)
String compare ignoring case (upper or lower)
Definition: strings.c:192
double class_apply_algorithm(char *algo, double *data, int nrec, int *nbreaks, double *classbreaks)
Definition: class.c:6
void G_free(void *buf)
Free allocated memory.
Definition: gis/alloc.c:142
void eqdrt(double vectx[], double vecty[], int i1, int i2, double *vabc)
Definition: basic.c:40
#define min(x, y)
Definition: draw2.c:68
int class_interval(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:31
int count
long num
Definition: g3dcats.c:93
void basic_stats(double *data, int count, struct GASTATS *stats)
Definition: basic.c:6
#define max(x, y)
Definition: draw2.c:69
int class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
Definition: class.c:125
double class_discont(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:244
double class_stdev(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:48
tuple data
G_warning("category support for [%s] in mapset [%s] %s", name, mapset, type)
int G_fatal_error(const char *msg,...)
Print a fatal error message to stderr.
DCELL dmax
Definition: g3dcolor.c:53
int n
Definition: dataquad.c:291
int class_quant(double *data, int count, int nbreaks, double *classbreaks)
Definition: class.c:112
int class_frequencies(double *data, int count, int nbreaks, double *classbreaks, int *frequencies)
Definition: class.c:424