/***************************************
 Merge sort functions.

 Part of the Routino routing software.
 ******************/ /******************
 This file Copyright 2009-2015, 2017, 2019, 2023 Andrew M. Bishop

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.

 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ***************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if defined(USE_PTHREADS) && USE_PTHREADS
#include <pthread.h>
#endif

#include "types.h"

#include "logging.h"
#include "files.h"
#include "sorting.h"

/*+ Enable debugging print statements +*/
#define DEBUG 0

/* Global variables */

/*+ The command line '--tmpdir' option or its default value. +*/
extern char *option_tmpdirname;

/*+ The amount of RAM to use for filesorting. +*/
extern size_t option_filesort_ramsize;

/*+ The number of filesorting threads allowed. +*/
extern int option_filesort_threads;


/* Thread data type definitions */

/*+ A data type for holding data for a thread. +*/
typedef struct _thread_data
 {
#if defined(USE_PTHREADS) && USE_PTHREADS

  pthread_t thread;             /*+ The thread identifier. +*/

  int       running;            /*+ A flag indicating the current state of the thread. +*/

#endif

  char     *data;               /*+ The main data array. +*/
  void    **datap;              /*+ An array of pointers to the data objects. +*/
  size_t    n;                  /*+ The number of pointers. +*/

  int       fd;                 /*+ The file descriptor of the file to write the results to. +*/

  size_t    itemsize;           /*+ The size of each item. +*/
  int     (*compare)(const void*,const void*); /*+ The comparison function. +*/
 }
 thread_data;

/* Thread variables and functions */

#if defined(USE_PTHREADS) && USE_PTHREADS

static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t  running_cond  = PTHREAD_COND_INITIALIZER;

static int wait_for_free_thread(thread_data *threads,int nthreads,int *threads_running);
static void wait_for_all_threads(thread_data *threads,int nthreads,int *threads_running);

#endif

/* Thread helper functions */

static void *filesort_heapsort_thread(thread_data *thread);

/* Local functions */

static index_t filesort_merge(int fd_out,int nfiles,size_t itemsize,size_t largestitemsize,
                              int(*compare_function)(const void*, const void*),
                              int (*post_sort_function)(void*,index_t));


/*++++++++++++++++++++++++++++++++++++++
  A function to sort the contents of a file of fixed length objects using a
  limited amount of RAM.

  The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort
  and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting.
  The individual sort steps and the merge step both use a "Heap sort"
  http://en.wikipedia.org/wiki/Heapsort.  The combination of the two should work well
  if the data is already partially sorted.

  index_t filesort_fixed Returns the number of objects kept.

  int fd_in The file descriptor of the input file (opened for reading and at the beginning).

  int fd_out The file descriptor of the output file (opened for writing and empty).

  size_t itemsize The size of each item in the file that needs sorting.

  int (*pre_sort_function)(void *,index_t) If non-NULL then this function is called for
     each item before they have been sorted.  The second parameter is the number of objects
     previously read from the input file.  If the function returns 1 then the object is kept
     and it is sorted, otherwise it is ignored.

  int (*compare_function)(const void*, const void*) The comparison function.  This is identical
     to qsort if the data to be sorted is an array of things not pointers.

  int (*post_sort_function)(void *,index_t) If non-NULL then this function is called for
     each item after they have been sorted.  The second parameter is the number of objects
     already written to the output file.  If the function returns 1 then the object is written
     to the output file., otherwise it is ignored.
  ++++++++++++++++++++++++++++++++++++++*/

index_t filesort_fixed(int fd_in,int fd_out,size_t itemsize,int (*pre_sort_function)(void*,index_t),
                                                            int (*compare_function)(const void*,const void*),
                                                            int (*post_sort_function)(void*,index_t))
{
 int nfiles=0;
 index_t count_out=0,count_in=0,total=0;
 size_t nitems,item;
 thread_data *threads;
 int nthreads,i,more=1;
 char *filename=(char*)malloc_logassert(strlen(option_tmpdirname)+24);
#if defined(USE_PTHREADS) && USE_PTHREADS
 int threads_running=0;
#endif

 /* Allocate the RAM buffer and other bits */

 nitems=(size_t)SizeFileFD(fd_in)/itemsize;

 if(nitems==0)
    return(0);

 if(option_filesort_threads==1)
    nthreads = 1;
 else if((nitems*(itemsize+sizeof(void*)))<option_filesort_ramsize)
    nthreads = 1 + (nitems*(itemsize+sizeof(void*))) / (option_filesort_ramsize/option_filesort_threads);
 else
    nthreads = option_filesort_threads;

 if((nitems*(itemsize+sizeof(void*)))>option_filesort_ramsize)
    nitems=option_filesort_ramsize/(itemsize+sizeof(void*));

 nitems=1+nitems/nthreads;

#if DEBUG
 printf("DEBUG: filesort_fixed nitems=%lu option_ramsize/option_threads=%lu => nitems=%lu datasize=%lu nthreads=%d\n",
        (size_t)SizeFileFD(fd_in)/itemsize,option_filesort_ramsize/option_filesort_threads,nitems,nitems*(itemsize+sizeof(void*)),nthreads);
#endif

 threads=(thread_data*)calloc_logassert(nthreads,sizeof(thread_data));

 for(i=0;i<nthreads;i++)
   {
    threads[i].fd=-1;

    threads[i].data=malloc_logassert(nitems*itemsize);
    threads[i].datap=malloc_logassert(nitems*sizeof(void*));

    log_malloc(threads[i].data ,nitems*itemsize);
    log_malloc(threads[i].datap,nitems*sizeof(void*));

    threads[i].itemsize=itemsize;
    threads[i].compare=compare_function;

#if defined(USE_PTHREADS) && USE_PTHREADS
    threads[i].running=0;
#endif
   }

 /* Loop around, fill the buffer, sort the data and write a temporary file */

 do
   {
    int thread=0;

#if defined(USE_PTHREADS) && USE_PTHREADS

    /* Choose a thread to use */

    if(nthreads>1)
       thread=wait_for_free_thread(threads,nthreads,&threads_running);

#endif

    /* Read in the data and create pointers */

    for(item=0;item<nitems;)
      {
       threads[thread].datap[item]=threads[thread].data+item*itemsize;

       if(ReadFileBuffered(fd_in,threads[thread].datap[item],itemsize))
         {
          more=0;
          break;
         }

       if(!pre_sort_function || pre_sort_function(threads[thread].datap[item],count_in))
         {
          item++;
          total++;
         }

       count_in++;
      }

#if DEBUG
    printf("DEBUG: filesort_fixed thread=%d file=%d item=%lu total=%u more=%d\n",thread,nfiles,item,total,more);
#endif

    /* Shortcut if there is no previous data and no more data (i.e. no data at all) */

    if(more==0 && total==0)
       goto tidy_and_exit;

    /* No new data read in this time round */

    if(item==0)
       break;

    /* Update the number of items to sort */

    threads[thread].n=item;

    /* Shortcut if only one file, don't write to disk */

    if(more==0 && nfiles==0)
      {
       filename[0]=0;

       filesort_heapsort_thread(&threads[thread]);
      }
    else
      {
       /* Create the file descriptor (not thread-safe) */

       sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles);

       threads[thread].fd=OpenFileBufferedNew(filename);

       if(nthreads==1)
         {
          filesort_heapsort_thread(&threads[thread]);

          CloseFileBuffered(threads[thread].fd);
         }

#if defined(USE_PTHREADS) && USE_PTHREADS

       else
         {
          threads[thread].running=1;

          pthread_create(&threads[thread].thread,NULL,(void* (*)(void*))filesort_heapsort_thread,&threads[thread]);

          threads_running++;
         }

#endif

      }

    nfiles++;
   }
 while(more);

#if defined(USE_PTHREADS) && USE_PTHREADS

 /* Wait for all of the threads to finish */

 if(nthreads>1)
    wait_for_all_threads(threads,nthreads,&threads_running);

#endif

 /* Shortcut if there are no files */

 if(nfiles==0)
    goto tidy_and_exit;

 /* Shortcut if only one file, lucky for us we still have the data in RAM) */

 if(nfiles==1)
   {
    for(item=0;item<threads[0].n;item++)
      {
       if(!post_sort_function || post_sort_function(threads[0].datap[item],count_out))
         {
          WriteFileBuffered(fd_out,threads[0].datap[item],itemsize);
          count_out++;
         }
      }

    if(filename[0])
       DeleteFile(filename);

    goto tidy_and_exit;
   }

 /* Open all of the temporary files and perform an n-way merge using a binary heap */

 count_out=filesort_merge(fd_out,nfiles,itemsize,0,compare_function,post_sort_function);

 /* Tidy up */

 tidy_and_exit:

 for(i=0;i<nthreads;i++)
   {
    log_free(threads[i].data);
    log_free(threads[i].datap);

    free(threads[i].data);
    free(threads[i].datap);
   }

 free(threads);

 free(filename);

 return(count_out);
}


/*++++++++++++++++++++++++++++++++++++++
  A function to sort the contents of a file of variable length objects (each
  preceded by its length in FILESORT_VARSIZE bytes) using a limited amount of RAM.

  The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort
  and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting.
  The individual sort steps and the merge step both use a "Heap sort"
  http://en.wikipedia.org/wiki/Heapsort.  The combination of the two should work well
  if the data is already partially sorted.

  index_t filesort_vary Returns the number of objects kept.

  int fd_in The file descriptor of the input file (opened for reading and at the beginning).

  int fd_out The file descriptor of the output file (opened for writing and empty).

  int (*pre_sort_function)(void *,index_t) If non-NULL then this function is called for
     each item before they have been sorted.  The second parameter is the number of objects
     previously read from the input file.  If the function returns 1 then the object is kept
     and it is sorted, otherwise it is ignored.

  int (*compare_function)(const void*, const void*) The comparison function.  This is identical
     to qsort if the data to be sorted is an array of things not pointers.

  int (*post_sort_function)(void *,index_t) If non-NULL then this function is called for
     each item after they have been sorted.  The second parameter is the number of objects
     already written to the output file.  If the function returns 1 then the object is written
     to the output file., otherwise it is ignored.
  ++++++++++++++++++++++++++++++++++++++*/

index_t filesort_vary(int fd_in,int fd_out,int (*pre_sort_function)(void*,index_t),
                                           int (*compare_function)(const void*,const void*),
                                           int (*post_sort_function)(void*,index_t))
{
 int nfiles=0;
 index_t count_out=0,count_in=0,total=0;
 size_t datasize,item;
 FILESORT_VARINT nextitemsize,largestitemsize=0;
 thread_data *threads;
 int nthreads,i,more=1;
 char *filename=(char*)malloc_logassert(strlen(option_tmpdirname)+24);
#if defined(USE_PTHREADS) && USE_PTHREADS
 int threads_running=0;
#endif

 /* Allocate the RAM buffer and other bits */

 datasize=(size_t)SizeFileFD(fd_in);

 if(datasize==0)
    return(0);

 /* We can not know in advance how many data items there are.  Each
    one will require RAM for data, FILESORT_VARALIGN and sizeof(void*)
    Assume that data+FILESORT_VARALIGN+sizeof(void*) is 2*data (tests
    show values of between 1.2 and 2.5). */

 datasize=datasize*2;

 if(option_filesort_threads==1)
    nthreads = 1;
 else if(datasize<option_filesort_ramsize)
    nthreads = 1 + datasize / (option_filesort_ramsize/option_filesort_threads);
 else
    nthreads = option_filesort_threads;

 if(datasize>option_filesort_ramsize)
    datasize=option_filesort_ramsize;

 datasize=datasize/nthreads;

 datasize=FILESORT_VARALIGN*((datasize+FILESORT_VARALIGN-1)/FILESORT_VARALIGN);

#if DEBUG
 printf("DEBUG: filesort_vary datasize=%lu option_ramsize/option_threads=%lu => datasize=%lu nthreads=%d\n",
        2*(size_t)SizeFileFD(fd_in),option_filesort_ramsize/option_filesort_threads,datasize,nthreads);
#endif

 threads=(thread_data*)calloc_logassert(nthreads,sizeof(thread_data));

 for(i=0;i<nthreads;i++)
   {
    threads[i].fd=-1;

    threads[i].data=malloc_logassert(datasize);
    threads[i].datap=NULL;

    log_malloc(threads[i].data,datasize);

    threads[i].itemsize=0;
    threads[i].compare=compare_function;

#if defined(USE_PTHREADS) && USE_PTHREADS
    threads[i].running=0;
#endif
   }

 /* Loop around, fill the buffer, sort the data and write a temporary file */

 if(ReadFileBuffered(fd_in,&nextitemsize,FILESORT_VARSIZE))    /* Always have the next item size known in advance */
    goto tidy_and_exit;

 do
   {
    size_t ramused=FILESORT_VARALIGN-FILESORT_VARSIZE;
    int thread=0;

#if defined(USE_PTHREADS) && USE_PTHREADS

    /* Choose a thread to use */

    if(nthreads>1)
       thread=wait_for_free_thread(threads,nthreads,&threads_running);

#endif

    threads[thread].datap=(void**)(threads[thread].data+datasize);

    item=0;

    /* Read in the data and create pointers */

    while((ramused+FILESORT_VARSIZE+nextitemsize)<=(size_t)((char*)threads[thread].datap-sizeof(void*)-threads[thread].data))
      {
       FILESORT_VARINT itemsize=nextitemsize;

       *(FILESORT_VARINT*)(threads[thread].data+ramused)=itemsize;

       ramused+=FILESORT_VARSIZE;

       ReadFileBuffered(fd_in,threads[thread].data+ramused,itemsize);

       if(!pre_sort_function || pre_sort_function(threads[thread].data+ramused,count_in))
         {
          *--threads[thread].datap=threads[thread].data+ramused; /* points to real data */

          if(itemsize>largestitemsize)
             largestitemsize=itemsize;

          ramused+=itemsize;

          ramused =FILESORT_VARALIGN*((ramused+FILESORT_VARALIGN-1)/FILESORT_VARALIGN);
          ramused+=FILESORT_VARALIGN-FILESORT_VARSIZE;

          total++;
          item++;
         }
       else
          ramused-=FILESORT_VARSIZE;

       count_in++;

       if(ReadFileBuffered(fd_in,&nextitemsize,FILESORT_VARSIZE))
         {
          more=0;
          break;
         }
      }

#if DEBUG
    printf("DEBUG: filesort_vary thread=%d file=%d item=%lu total=%u ramused=%lu more=%d\n",thread,nfiles,item,total,ramused+item*sizeof(void*),more);
#endif

    /* No new data read in this time round */

    if(item==0)
       break;

    /* Update the number of items to sort */

    threads[thread].n=item;

    /* Shortcut if only one file, don't write to disk */

    if(more==0 && nfiles==0)
      {
       filename[0]=0;

       filesort_heapsort_thread(&threads[thread]);
      }
    else
      {
       /* Create the file descriptor (not thread-safe) */

       sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles);

       threads[thread].fd=OpenFileBufferedNew(filename);

       if(nthreads==1)
         {
          filesort_heapsort_thread(&threads[thread]);

          CloseFileBuffered(threads[thread].fd);
         }

#if defined(USE_PTHREADS) && USE_PTHREADS

       else
         {
          threads[thread].running=1;

          pthread_create(&threads[thread].thread,NULL,(void* (*)(void*))filesort_heapsort_thread,&threads[thread]);

          threads_running++;
         }

#endif

      }

    nfiles++;
   }
 while(more);

#if defined(USE_PTHREADS) && USE_PTHREADS

 /* Wait for all of the threads to finish */

 if(nthreads>1)
    wait_for_all_threads(threads,nthreads,&threads_running);

#endif

 /* Shortcut if there are no files */

 if(nfiles==0)
    goto tidy_and_exit;

 /* Shortcut if only one file, lucky for us we still have the data in RAM) */

 if(nfiles==1)
   {
    for(item=0;item<threads[0].n;item++)
      {
       if(!post_sort_function || post_sort_function(threads[0].datap[item],count_out))
         {
          FILESORT_VARINT itemsize=*(FILESORT_VARINT*)((char*)threads[0].datap[item]-FILESORT_VARSIZE);

          WriteFileBuffered(fd_out,(char*)threads[0].datap[item]-FILESORT_VARSIZE,itemsize+FILESORT_VARSIZE);
          count_out++;
         }
      }

    if(filename[0])
       DeleteFile(filename);

    goto tidy_and_exit;
   }

 /* Open all of the temporary files and perform an n-way merge using a binary heap */

 largestitemsize=FILESORT_VARALIGN*((largestitemsize+FILESORT_VARALIGN-1)/FILESORT_VARALIGN);

 count_out=filesort_merge(fd_out,nfiles,0,largestitemsize,compare_function,post_sort_function);

 /* Tidy up */

 tidy_and_exit:

 for(i=0;i<nthreads;i++)
   {
    log_free(threads[i].data);

    free(threads[i].data);
   }

 free(threads);

 free(filename);

 return(count_out);
}


#if defined(USE_PTHREADS) && USE_PTHREADS

/*++++++++++++++++++++++++++++++++++++++
  Wait for a thread to become free and return the number of that thread while updating the number running.

  int wait_for_free_thread Returns the thread number.

  thread_data *threads The thread data.

  int nthreads The number of threads allowed.

  int *threads_running The number of threads, updated when called.
  ++++++++++++++++++++++++++++++++++++++*/

static int wait_for_free_thread(thread_data *threads,int nthreads,int *threads_running)
{
 int i;

 /* Lock the mutex */

 pthread_mutex_lock(&running_mutex);

 /* If all threads are in use wait for an existing thread to finish */

 while((*threads_running)==nthreads)
   {
    for(i=0;i<nthreads;i++)
       if(threads[i].running==2)
         {
          pthread_join(threads[i].thread,NULL);
          threads[i].running=0;
          CloseFileBuffered(threads[i].fd);
          (*threads_running)--;
         }

    if((*threads_running)==nthreads)
       pthread_cond_wait(&running_cond,&running_mutex);
   }

 /* Find a spare slot */

 for(i=0;i<nthreads;i++)
    if(!threads[i].running)
       break;

 /* Unlock the mutex */

 pthread_mutex_unlock(&running_mutex);

 return(i);
}


/*++++++++++++++++++++++++++++++++++++++
  Wait for all threads to finish while updating the number running.

  thread_data *threads The thread data.

  int nthreads The number of threads allowed.

  int *threads_running The number of threads, updated when called.
  ++++++++++++++++++++++++++++++++++++++*/

static void wait_for_all_threads(thread_data *threads,int nthreads,int *threads_running)
{
 int i;

 /* Lock the mutex */

 pthread_mutex_lock(&running_mutex);

 /* Wait for all threads to finish */

 while(*threads_running)
   {
    for(i=0;i<nthreads;i++)
       if(threads[i].running==2)
         {
          pthread_join(threads[i].thread,NULL);
          threads[i].running=0;
          CloseFileBuffered(threads[i].fd);
          (*threads_running)--;
         }

    if(*threads_running)
       pthread_cond_wait(&running_cond,&running_mutex);
   }

 /* Unlock the mutex */

 pthread_mutex_unlock(&running_mutex);
}

#endif


/*++++++++++++++++++++++++++++++++++++++
  A wrapper function that can be run in a thread for fixed or variable data.

  void *filesort_heapsort_thread Returns NULL (required to return void*).

  thread_data *thread The data to be processed in this thread.
  ++++++++++++++++++++++++++++++++++++++*/

static void *filesort_heapsort_thread(thread_data *thread)
{
 size_t item;

 /* Sort the data pointers using a heap sort */

 filesort_heapsort(thread->datap,thread->n,thread->compare);

 /* Write the result to the temporary file if given */

 if(thread->fd>0)
   {
    if(thread->itemsize>0)
       for(item=0;item<thread->n;item++)
          WriteFileBuffered(thread->fd,thread->datap[item],thread->itemsize);
    else
       for(item=0;item<thread->n;item++)
         {
          FILESORT_VARINT itemsize=*(FILESORT_VARINT*)((char*)thread->datap[item]-FILESORT_VARSIZE);

          WriteFileBuffered(thread->fd,(char*)thread->datap[item]-FILESORT_VARSIZE,itemsize+FILESORT_VARSIZE);
         }
   }

#if defined(USE_PTHREADS) && USE_PTHREADS

 /* Signal that this thread has finished */

 if(thread->running==1)
   {
    pthread_mutex_lock(&running_mutex);

    thread->running=2;

    pthread_cond_signal(&running_cond);

    pthread_mutex_unlock(&running_mutex);
   }

#endif

 return(NULL);
}


/*++++++++++++++++++++++++++++++++++++++
  A function to sort an array of pointers efficiently.

  The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort,
  in particular, this is good because it can operate in-place and doesn't
  allocate more memory like using qsort() does.

  void **datap A pointer to the array of pointers to sort.

  size_t nitems The number of items of data to sort.

  int (*compare_function)(const void*, const void*) The comparison function.  This is identical
     to qsort if the data to be sorted is an array of things not pointers.
  ++++++++++++++++++++++++++++++++++++++*/

void filesort_heapsort(void **datap,size_t nitems,int(*compare_function)(const void*, const void*))
{
 void **datap1=&datap[-1];
 size_t item;

 /* Fill the heap by pretending to insert the data that is already there */

 for(item=2;item<=nitems;item++)
   {
    size_t index=item;

    /* Bubble up the new value (upside-down, put largest at top) */

    while(index>1)
      {
       int newindex;
       void *temp;

       newindex=index/2;

       if(compare_function(datap1[index],datap1[newindex])<=0) /* reversed comparison to filesort_fixed() above */
          break;

       temp=datap1[index];
       datap1[index]=datap1[newindex];
       datap1[newindex]=temp;

       index=newindex;
      }
   }

 /* Repeatedly pull out the root of the heap and swap with the bottom item */

 for(item=nitems;item>1;item--)
   {
    size_t index=1;
    void *temp;

    temp=datap1[index];
    datap1[index]=datap1[item];
    datap1[item]=temp;

    /* Bubble down the new value (upside-down, put largest at top) */

    while((2*index)<(item-1))
      {
       int newindex;
       void **temp;

       newindex=2*index;

       if(compare_function(datap1[newindex],datap1[newindex+1])<=0) /* reversed comparison to filesort_fixed() above */
          newindex=newindex+1;

       if(compare_function(datap1[index],datap1[newindex])>=0) /* reversed comparison to filesort_fixed() above */
          break;

       temp=datap1[newindex];
       datap1[newindex]=datap1[index];
       datap1[index]=temp;

       index=newindex;
      }

    if((2*index)==(item-1))
      {
       int newindex;
       void *temp;

       newindex=2*index;

       if(compare_function(datap1[index],datap1[newindex])>=0) /* reversed comparison to filesort_fixed() above */
          ; /* break */
       else
         {
          temp=datap1[newindex];
          datap1[newindex]=datap1[index];
          datap1[index]=temp;
         }
      }
   }
}


/*++++++++++++++++++++++++++++++++++++++
  A function to merge an array of sorted files efficiently.

  The data is merged using an "external sort" http://en.wikipedia.org/wiki/External_sorting
  where only one item is read from each file at a time.

  index_t filesort_merge Returns the number of items written to the output file

  int fd_out The file descriptor of the output file (opened for writing and empty).

  int nfiles The number of files to open and merge

  size_t itemsize The size of each item (non-zero if a fixed size sort).

  size_t largestitemsize The maximum size of each item (non-zero if a variable size sort).

  int (*compare_function)(const void*, const void*) The comparison function.  This is identical
     to qsort if the data to be sorted is an array of things not pointers.

  int (*post_sort_function)(void *,index_t) If non-NULL then this function is called for
     each item after they have been sorted.  The second parameter is the number of objects
     already written to the output file.  If the function returns 1 then the object is written
     to the output file., otherwise it is ignored.
  ++++++++++++++++++++++++++++++++++++++*/

static index_t filesort_merge(int fd_out,int nfiles,size_t itemsize,size_t largestitemsize,
                              int(*compare_function)(const void*, const void*),
                              int (*post_sort_function)(void*,index_t))
{
 int ndata=0;
 char *data,*filename;
 void **datap;
 int *fds,*heap;
 index_t count_out=0;
 int i;

 /* Allocate the memory */

 filename=(char*)malloc_logassert(strlen(option_tmpdirname)+24);

 heap=(int*)malloc_logassert((1+nfiles)*sizeof(int));

 if(itemsize)
    data=(void*)malloc_logassert(nfiles*itemsize);
 else
    data=(void*)malloc_logassert(nfiles*(largestitemsize+FILESORT_VARALIGN));

 datap=(void**)malloc_logassert(nfiles*sizeof(void*));

 fds=(int*)malloc_logassert(nfiles*sizeof(int));

 /* Re-open the files */

 for(i=0;i<nfiles;i++)
   {
    sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i);

    fds[i]=ReOpenFileBuffered(filename);

    DeleteFile(filename);
   }

 /* Fill the heap to start with */

 for(i=0;i<nfiles;i++)
   {
    int index;

    if(itemsize)
      {
       datap[i]=data+i*itemsize;

       ReadFileBuffered(fds[i],datap[i],itemsize);
      }
    else
      {
       FILESORT_VARINT itemsize;

       datap[i]=data+FILESORT_VARALIGN+i*(largestitemsize+FILESORT_VARALIGN);

       ReadFileBuffered(fds[i],&itemsize,FILESORT_VARSIZE);

       *(FILESORT_VARINT*)((char*)datap[i]-FILESORT_VARSIZE)=itemsize;

       ReadFileBuffered(fds[i],datap[i],itemsize);
      }

    index=i+1;

    heap[index]=i;

    /* Bubble up the new value */

    while(index>1)
      {
       int newindex;
       int temp;

       newindex=index/2;

       if(compare_function(datap[heap[index]],datap[heap[newindex]])>=0)
          break;

       temp=heap[index];
       heap[index]=heap[newindex];
       heap[newindex]=temp;

       index=newindex;
      }
   }

 /* Repeatedly pull out the root of the heap and refill from the same file */

 ndata=nfiles;

 do
   {
    int index=1;

    if(!post_sort_function || post_sort_function(datap[heap[index]],count_out))
      {
       if(itemsize)
          WriteFileBuffered(fd_out,datap[heap[index]],itemsize);
       else
         {
          FILESORT_VARINT itemsize=*(FILESORT_VARINT*)((char*)datap[heap[index]]-FILESORT_VARSIZE);

          WriteFileBuffered(fd_out,(char*)datap[heap[index]]-FILESORT_VARSIZE,itemsize+FILESORT_VARSIZE);
         }
       count_out++;
      }

    if(itemsize)
      {
       if(ReadFileBuffered(fds[heap[index]],datap[heap[index]],itemsize))
         {
          heap[index]=heap[ndata];
          ndata--;
         }
      }
    else
      {
       FILESORT_VARINT itemsize;

       if(ReadFileBuffered(fds[heap[index]],&itemsize,FILESORT_VARSIZE))
         {
          heap[index]=heap[ndata];
          ndata--;
         }
       else
         {
          *(FILESORT_VARINT*)((char*)datap[heap[index]]-FILESORT_VARSIZE)=itemsize;

          ReadFileBuffered(fds[heap[index]],datap[heap[index]],itemsize);
         }
      }

    /* Bubble down the new value */

    while((2*index)<ndata)
      {
       int newindex;
       int temp;

       newindex=2*index;

       if(compare_function(datap[heap[newindex]],datap[heap[newindex+1]])>=0)
          newindex=newindex+1;

       if(compare_function(datap[heap[index]],datap[heap[newindex]])<=0)
          break;

       temp=heap[newindex];
       heap[newindex]=heap[index];
       heap[index]=temp;

       index=newindex;
      }

    if((2*index)==ndata)
      {
       int newindex;
       int temp;

       newindex=2*index;

       if(compare_function(datap[heap[index]],datap[heap[newindex]])<=0)
          ; /* break */
       else
         {
          temp=heap[newindex];
          heap[newindex]=heap[index];
          heap[index]=temp;
         }
      }
   }
 while(ndata>0);

 /* Tidy up */

 free(filename);

 for(i=0;i<nfiles;i++)
    CloseFileBuffered(fds[i]);

 free(fds);

 free(heap);

 free(data);
 free(datap);

 return(count_out);
}
