005-04-17 Thomas Koenig <Thomas.Koenig@online.de>
[gcc.git] / libgfortran / intrinsics / pack_generic.c
1 /* Generic implementation of the PACK intrinsic
2 Copyright (C) 2002, 2004 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11
12 In addition to the permissions in the GNU General Public License, the
13 Free Software Foundation gives you unlimited permission to link the
14 compiled version of this file into combinations with other programs,
15 and to distribute those combinations without any restriction coming
16 from the use of this file. (The General Public License restrictions
17 do apply in other respects; for example, they cover modification of
18 the file, and distribution when not linked into a combine
19 executable.)
20
21 Ligbfortran is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
25
26 You should have received a copy of the GNU General Public
27 License along with libgfortran; see the file COPYING. If not,
28 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
29 Boston, MA 02111-1307, USA. */
30
31 #include "config.h"
32 #include <stdlib.h>
33 #include <assert.h>
34 #include <string.h>
35 #include "libgfortran.h"
36
37 /* PACK is specified as follows:
38
39 13.14.80 PACK (ARRAY, MASK, [VECTOR])
40
41 Description: Pack an array into an array of rank one under the
42 control of a mask.
43
44 Class: Transformational fucntion.
45
46 Arguments:
47 ARRAY may be of any type. It shall not be scalar.
48 MASK shall be of type LOGICAL. It shall be conformable with ARRAY.
49 VECTOR (optional) shall be of the same type and type parameters
50 as ARRAY. VECTOR shall have at least as many elements as
51 there are true elements in MASK. If MASK is a scalar
52 with the value true, VECTOR shall have at least as many
53 elements as there are in ARRAY.
54
55 Result Characteristics: The result is an array of rank one with the
56 same type and type parameters as ARRAY. If VECTOR is present, the
57 result size is that of VECTOR; otherwise, the result size is the
58 number /t/ of true elements in MASK unless MASK is scalar with the
59 value true, in which case the result size is the size of ARRAY.
60
61 Result Value: Element /i/ of the result is the element of ARRAY
62 that corresponds to the /i/th true element of MASK, taking elements
63 in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is
64 present and has size /n/ > /t/, element /i/ of the result has the
65 value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/.
66
67 Examples: The nonzero elements of an array M with the value
68 | 0 0 0 |
69 | 9 0 0 | may be "gathered" by the function PACK. The result of
70 | 0 0 7 |
71 PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0,
72 VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12].
73
74 There are two variants of the PACK intrinsic: one, where MASK is
75 array valued, and the other one where MASK is scalar. */
76
77 extern void pack (gfc_array_char *, const gfc_array_char *,
78 const gfc_array_l4 *, const gfc_array_char *);
79 export_proto(pack);
80
81 void
82 pack (gfc_array_char *ret, const gfc_array_char *array,
83 const gfc_array_l4 *mask, const gfc_array_char *vector)
84 {
85 /* r.* indicates the return array. */
86 index_type rstride0;
87 char *rptr;
88 /* s.* indicates the source array. */
89 index_type sstride[GFC_MAX_DIMENSIONS];
90 index_type sstride0;
91 const char *sptr;
92 /* m.* indicates the mask array. */
93 index_type mstride[GFC_MAX_DIMENSIONS];
94 index_type mstride0;
95 const GFC_LOGICAL_4 *mptr;
96
97 index_type count[GFC_MAX_DIMENSIONS];
98 index_type extent[GFC_MAX_DIMENSIONS];
99 index_type n;
100 index_type dim;
101 index_type size;
102 index_type nelem;
103
104 size = GFC_DESCRIPTOR_SIZE (array);
105 dim = GFC_DESCRIPTOR_RANK (array);
106 for (n = 0; n < dim; n++)
107 {
108 count[n] = 0;
109 extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
110 sstride[n] = array->dim[n].stride * size;
111 mstride[n] = mask->dim[n].stride;
112 }
113 if (sstride[0] == 0)
114 sstride[0] = size;
115 if (mstride[0] == 0)
116 mstride[0] = 1;
117
118 sptr = array->data;
119 mptr = mask->data;
120
121 /* Use the same loop for both logical types. */
122 if (GFC_DESCRIPTOR_SIZE (mask) != 4)
123 {
124 if (GFC_DESCRIPTOR_SIZE (mask) != 8)
125 runtime_error ("Funny sized logical array");
126 for (n = 0; n < dim; n++)
127 mstride[n] <<= 1;
128 mstride0 <<= 1;
129 mptr = GFOR_POINTER_L8_TO_L4 (mptr);
130 }
131
132 if (ret->data == NULL)
133 {
134 /* Allocate the memory for the result. */
135 int total;
136
137 if (vector != NULL)
138 {
139 /* The return array will have as many
140 elements as there are in VECTOR. */
141 total = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
142 }
143 else
144 {
145 /* We have to count the true elements in MASK. */
146
147 /* TODO: We could speed up pack easily in the case of only
148 few .TRUE. entries in MASK, by keeping track of where we
149 would be in the source array during the initial traversal
150 of MASK, and caching the pointers to those elements. Then,
151 supposed the number of elements is small enough, we would
152 only have to traverse the list, and copy those elements
153 into the result array. In the case of datatypes which fit
154 in one of the integer types we could also cache the
155 value instead of a pointer to it.
156 This approach might be bad from the point of view of
157 cache behavior in the case where our cache is not big
158 enough to hold all elements that have to be copied. */
159
160 const GFC_LOGICAL_4 *m = mptr;
161
162 total = 0;
163
164 while (m)
165 {
166 /* Test this element. */
167 if (*m)
168 total++;
169
170 /* Advance to the next element. */
171 m += mstride[0];
172 count[0]++;
173 n = 0;
174 while (count[n] == extent[n])
175 {
176 /* When we get to the end of a dimension, reset it
177 and increment the next dimension. */
178 count[n] = 0;
179 /* We could precalculate this product, but this is a
180 less frequently used path so proabably not worth
181 it. */
182 m -= mstride[n] * extent[n];
183 n++;
184 if (n >= dim)
185 {
186 /* Break out of the loop. */
187 m = NULL;
188 break;
189 }
190 else
191 {
192 count[n]++;
193 mptr += mstride[n];
194 }
195 }
196 }
197 }
198
199 /* Setup the array descriptor. */
200 ret->dim[0].lbound = 0;
201 ret->dim[0].ubound = total - 1;
202 ret->dim[0].stride = 1;
203
204 ret->data = internal_malloc_size (size * total);
205 ret->base = 0;
206
207 if (total == 0)
208 /* In this case, nothing remains to be done. */
209 return;
210 }
211
212 rstride0 = ret->dim[0].stride * size;
213 if (rstride0 == 0)
214 rstride0 = size;
215 sstride0 = sstride[0];
216 mstride0 = mstride[0];
217 rptr = ret->data;
218
219 while (sptr)
220 {
221 /* Test this element. */
222 if (*mptr)
223 {
224 /* Add it. */
225 memcpy (rptr, sptr, size);
226 rptr += rstride0;
227 }
228 /* Advance to the next element. */
229 sptr += sstride0;
230 mptr += mstride0;
231 count[0]++;
232 n = 0;
233 while (count[n] == extent[n])
234 {
235 /* When we get to the end of a dimension, reset it and increment
236 the next dimension. */
237 count[n] = 0;
238 /* We could precalculate these products, but this is a less
239 frequently used path so proabably not worth it. */
240 sptr -= sstride[n] * extent[n];
241 mptr -= mstride[n] * extent[n];
242 n++;
243 if (n >= dim)
244 {
245 /* Break out of the loop. */
246 sptr = NULL;
247 break;
248 }
249 else
250 {
251 count[n]++;
252 sptr += sstride[n];
253 mptr += mstride[n];
254 }
255 }
256 }
257
258 /* Add any remaining elements from VECTOR. */
259 if (vector)
260 {
261 n = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
262 nelem = ((rptr - ret->data) / rstride0);
263 if (n > nelem)
264 {
265 sstride0 = vector->dim[0].stride * size;
266 if (sstride0 == 0)
267 sstride0 = size;
268
269 sptr = vector->data + sstride0 * nelem;
270 n -= nelem;
271 while (n--)
272 {
273 memcpy (rptr, sptr, size);
274 rptr += rstride0;
275 sptr += sstride0;
276 }
277 }
278 }
279 }
280
281 extern void pack_s (gfc_array_char *ret, const gfc_array_char *array,
282 const GFC_LOGICAL_4 *, const gfc_array_char *);
283 export_proto(pack_s);
284
285 void
286 pack_s (gfc_array_char *ret, const gfc_array_char *array,
287 const GFC_LOGICAL_4 *mask, const gfc_array_char *vector)
288 {
289 /* r.* indicates the return array. */
290 index_type rstride0;
291 char *rptr;
292 /* s.* indicates the source array. */
293 index_type sstride[GFC_MAX_DIMENSIONS];
294 index_type sstride0;
295 const char *sptr;
296
297 index_type count[GFC_MAX_DIMENSIONS];
298 index_type extent[GFC_MAX_DIMENSIONS];
299 index_type n;
300 index_type dim;
301 index_type size;
302 index_type nelem;
303
304 size = GFC_DESCRIPTOR_SIZE (array);
305 dim = GFC_DESCRIPTOR_RANK (array);
306 for (n = 0; n < dim; n++)
307 {
308 count[n] = 0;
309 extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
310 sstride[n] = array->dim[n].stride * size;
311 }
312 if (sstride[0] == 0)
313 sstride[0] = size;
314
315 sstride0 = sstride[0];
316 sptr = array->data;
317
318 if (ret->data == NULL)
319 {
320 /* Allocate the memory for the result. */
321 int total;
322
323 if (vector != NULL)
324 {
325 /* The return array will have as many elements as there are
326 in vector. */
327 total = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
328 }
329 else
330 {
331 if (*mask)
332 {
333 /* The result array will have as many elements as the input
334 array. */
335 total = extent[0];
336 for (n = 1; n < dim; n++)
337 total *= extent[n];
338 }
339 else
340 {
341 /* The result array will be empty. */
342 ret->dim[0].lbound = 0;
343 ret->dim[0].ubound = -1;
344 ret->dim[0].stride = 1;
345 ret->data = internal_malloc_size (0);
346 ret->base = 0;
347
348 return;
349 }
350 }
351
352 /* Setup the array descriptor. */
353 ret->dim[0].lbound = 0;
354 ret->dim[0].ubound = total - 1;
355 ret->dim[0].stride = 1;
356
357 ret->data = internal_malloc_size (size * total);
358 ret->base = 0;
359 }
360
361 rstride0 = ret->dim[0].stride * size;
362 if (rstride0 == 0)
363 rstride0 = size;
364 rptr = ret->data;
365
366 /* The remaining possibilities are now:
367 If MASK is .TRUE., we have to copy the source array into the
368 result array. We then have to fill it up with elements from VECTOR.
369 If MASK is .FALSE., we have to copy VECTOR into the result
370 array. If VECTOR were not present we would have already returned. */
371
372 if (*mask)
373 {
374 while (sptr)
375 {
376 /* Add this element. */
377 memcpy (rptr, sptr, size);
378 rptr += rstride0;
379
380 /* Advance to the next element. */
381 sptr += sstride0;
382 count[0]++;
383 n = 0;
384 while (count[n] == extent[n])
385 {
386 /* When we get to the end of a dimension, reset it and
387 increment the next dimension. */
388 count[n] = 0;
389 /* We could precalculate these products, but this is a
390 less frequently used path so proabably not worth it. */
391 sptr -= sstride[n] * extent[n];
392 n++;
393 if (n >= dim)
394 {
395 /* Break out of the loop. */
396 sptr = NULL;
397 break;
398 }
399 else
400 {
401 count[n]++;
402 sptr += sstride[n];
403 }
404 }
405 }
406 }
407
408 /* Add any remaining elements from VECTOR. */
409 if (vector)
410 {
411 n = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
412 nelem = ((rptr - ret->data) / rstride0);
413 if (n > nelem)
414 {
415 sstride0 = vector->dim[0].stride * size;
416 if (sstride0 == 0)
417 sstride0 = size;
418
419 sptr = vector->data + sstride0 * nelem;
420 n -= nelem;
421 while (n--)
422 {
423 memcpy (rptr, sptr, size);
424 rptr += rstride0;
425 sptr += sstride0;
426 }
427 }
428 }
429 }