16
16
** You should have received a copy of the GNU General Public License
17
17
** along with tsinfer. If not, see <http://www.gnu.org/licenses/>.
18
18
*/
19
+ /* It's not worth trying to get mmap'd genotypes working on windows,
20
+ * and is just a silent no-op if it's tried.
21
+ */
22
+ #if defined(_WIN32 )
23
+ #else
24
+ /* Needed for ftruncate */
25
+ #define _XOPEN_SOURCE 700
26
+ #define MMAP_GENOTYPES 1
27
+ #endif
19
28
20
29
#include "tsinfer.h"
21
30
#include "err.h"
25
34
#include <string.h>
26
35
#include <stdbool.h>
27
36
37
+ #include <errno.h>
38
+ #include <fcntl.h>
39
+ #include <stdlib.h>
40
+ #include <string.h>
41
+
42
+ #ifdef MMAP_GENOTYPES
43
+ #include <sys/mman.h>
44
+ #include <unistd.h>
45
+ #include <sys/types.h>
46
+ #endif
47
+
28
48
#include "avl.h"
29
49
30
50
/* Note: using an unoptimised version of bit packing here because we're
@@ -135,6 +155,7 @@ ancestor_builder_print_state(ancestor_builder_t *self, FILE *out)
135
155
136
156
fprintf (out , "Ancestor builder\n" );
137
157
fprintf (out , "flags = %d\n" , (int ) self -> flags );
158
+ fprintf (out , "mmap_fd = %d\n" , self -> mmap_fd );
138
159
fprintf (out , "num_samples = %d\n" , (int ) self -> num_samples );
139
160
fprintf (out , "num_sites = %d\n" , (int ) self -> num_sites );
140
161
fprintf (out , "num_ancestors = %d\n" , (int ) self -> num_ancestors );
@@ -181,23 +202,62 @@ ancestor_builder_print_state(ancestor_builder_t *self, FILE *out)
181
202
return 0 ;
182
203
}
183
204
184
- int
185
- ancestor_builder_alloc (
186
- ancestor_builder_t * self , size_t num_samples , size_t max_sites , int flags )
205
+ #ifdef MMAP_GENOTYPES
206
+
207
+ static int
208
+ ancestor_builder_make_genotype_mmap (ancestor_builder_t * self )
187
209
{
210
+
188
211
int ret = 0 ;
189
- unsigned long max_size = 1024 * 1024 ;
190
212
191
- memset ( self , 0 , sizeof ( ancestor_builder_t )) ;
192
- if (num_samples <= 1 ) {
193
- ret = TSI_ERR_BAD_NUM_SAMPLES ;
213
+ self -> mmap_size = self -> max_sites * self -> encoded_genotypes_size ;
214
+ if (ftruncate ( self -> mmap_fd , ( off_t ) self -> mmap_size ) != 0 ) {
215
+ ret = TSI_ERR_IO ;
194
216
goto out ;
195
217
}
218
+ self -> mmap_buffer = mmap (
219
+ NULL , self -> mmap_size , PROT_READ | PROT_WRITE , MAP_SHARED , self -> mmap_fd , 0 );
220
+ if (self -> mmap_buffer == MAP_FAILED ) {
221
+ self -> mmap_buffer = NULL ;
222
+ ret = TSI_ERR_IO ;
223
+ goto out ;
224
+ }
225
+ self -> mmap_offset = 0 ;
226
+ out :
227
+ return ret ;
228
+ }
196
229
230
+ static int
231
+ ancestor_builder_free_genotype_mmap (ancestor_builder_t * self )
232
+ {
233
+ if (self -> mmap_buffer != NULL ) {
234
+ /* There's nothing we can do about it here, so don't check errors. */
235
+ munmap (self -> mmap_buffer , self -> mmap_size );
236
+ }
237
+ /* Try to truncate to zero so we don't flush out all the data */
238
+ ftruncate (self -> mmap_fd , 0 );
239
+ return 0 ;
240
+ }
241
+ #endif
242
+
243
+ int
244
+ ancestor_builder_alloc (ancestor_builder_t * self , size_t num_samples , size_t max_sites ,
245
+ int mmap_fd , int flags )
246
+ {
247
+ int ret = 0 ;
248
+ unsigned long max_size = 1024 * 1024 ;
249
+
250
+ memset (self , 0 , sizeof (ancestor_builder_t ));
197
251
self -> num_samples = num_samples ;
198
252
self -> max_sites = max_sites ;
253
+ self -> mmap_fd = mmap_fd ;
199
254
self -> num_sites = 0 ;
200
255
self -> flags = flags ;
256
+
257
+ if (num_samples <= 1 ) {
258
+ ret = TSI_ERR_BAD_NUM_SAMPLES ;
259
+ goto out ;
260
+ }
201
261
if (self -> flags & TSI_GENOTYPE_ENCODING_ONE_BIT ) {
202
262
self -> encoded_genotypes_size = (num_samples / 8 ) + ((num_samples % 8 ) != 0 );
203
263
self -> decoded_genotypes_size = self -> encoded_genotypes_size * 8 ;
@@ -228,6 +288,14 @@ ancestor_builder_alloc(
228
288
if (ret != 0 ) {
229
289
goto out ;
230
290
}
291
+ #if MMAP_GENOTYPES
292
+ if (self -> mmap_fd != -1 ) {
293
+ ret = ancestor_builder_make_genotype_mmap (self );
294
+ if (ret != 0 ) {
295
+ goto out ;
296
+ }
297
+ }
298
+ #endif
231
299
avl_init_tree (& self -> time_map , cmp_time_map , NULL );
232
300
out :
233
301
return ret ;
@@ -236,13 +304,19 @@ ancestor_builder_alloc(
236
304
size_t
237
305
ancestor_builder_get_memsize (const ancestor_builder_t * self )
238
306
{
239
- /* Ignore the other allocs as insignificant */
307
+ /* Ignore the other allocs as insignificant, and don't report the
308
+ * size of the mmap'd region */
240
309
return self -> main_allocator .total_size + self -> indexing_allocator .total_size ;
241
310
}
242
311
243
312
int
244
313
ancestor_builder_free (ancestor_builder_t * self )
245
314
{
315
+ #if MMAP_GENOTYPES
316
+ if (self -> mmap_fd != -1 ) {
317
+ ancestor_builder_free_genotype_mmap (self );
318
+ }
319
+ #endif
246
320
tsi_safe_free (self -> sites );
247
321
tsi_safe_free (self -> descriptors );
248
322
tsk_safe_free (self -> genotype_encode_buffer );
@@ -558,7 +632,18 @@ ancestor_builder_encode_genotypes(
558
632
static uint8_t *
559
633
ancestor_builder_allocate_genotypes (ancestor_builder_t * self )
560
634
{
561
- return tsk_blkalloc_get (& self -> main_allocator , self -> encoded_genotypes_size );
635
+ uint8_t * ret = NULL ;
636
+ void * p ;
637
+
638
+ if (self -> mmap_buffer == NULL ) {
639
+ ret = tsk_blkalloc_get (& self -> main_allocator , self -> encoded_genotypes_size );
640
+ } else {
641
+ p = (char * ) self -> mmap_buffer + self -> mmap_offset ;
642
+ self -> mmap_offset += self -> encoded_genotypes_size ;
643
+ assert (self -> mmap_offset <= self -> mmap_size );
644
+ ret = (uint8_t * ) p ;
645
+ }
646
+ return ret ;
562
647
}
563
648
564
649
int WARN_UNUSED
0 commit comments