program main
   use, intrinsic :: iso_fortran_env, only: REAL32
   use mpi
   use pFIO
   use ctest_io_CLI
   use MAPL_ExceptionHandling
   use FakeHistData0Mod
   use pFlogger, only: pflogger_init => initialize
   implicit none
   integer :: rank, npes, ierror, provided,required
   integer :: status, color, key
   class(BaseServer), target, allocatable :: iserver,oserver
   class(AbstractDirectoryService), allocatable, target :: directory_service
   type (CommandLineOptions0) :: options
   integer, parameter :: NO_COLOR     = 0
   integer, parameter :: iSERVER_COLOR = 1
   integer, parameter :: oSERVER_COLOR = 4
   integer, parameter :: CLIENT_COLOR  = 2
   integer, parameter :: BOTH_COLOR    = 3
   type (FakeHistData0), target :: HistData
   integer :: my_comm_world, my_iComm, my_oComm, my_appcomm
   integer :: client_start, low_rank,up_rank
   integer :: i,k, size_iclient, size_oclient
   integer :: app_start_rank, app_end_rank
   character(len = 20) :: out_file
   character(len = 100):: cmd
   integer :: N_iclient_group, N_oclient_group,N_groups
   integer,allocatable :: local_comm_world(:), app_comms(:)
   integer :: md_id, exit_code
   
   required = MPI_THREAD_MULTIPLE
   !call MPI_init_thread(required, provided, ierror)
   call MPI_init(ierror)
   call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierror)
   call MPI_Comm_size(MPI_COMM_WORLD, npes, ierror)
   call process_command_line(options, rc=status)
   ! split comm_world to local_world
   N_iclient_group = options%N_ig
   N_oclient_group = options%N_og
   N_groups = N_iclient_group + N_oclient_group
 
   allocate(local_comm_world(N_groups))
   allocate(app_comms(N_groups))
   key = 0
   if (options%server_type == 'simple') then
      options%npes_iserver = npes
      options%npes_oserver = npes
      N_iclient_group = 1
      N_oclient_group = 1
      size_iclient = 0
      client_start = 0
      app_start_rank = 0
      app_end_rank = npes-1
   else if (options%server_type == 'mpi' .or. &
            options%server_type == 'multilayer' .or. &
            options%server_type == 'multicomm'  .or. &
            options%server_type == 'multigroup' ) then 
      size_iclient = N_iclient_group*options%npes_iserver
      size_oclient = N_oclient_group*options%npes_oserver
      client_start = npes - size_iclient-size_oclient
      app_start_rank = 0
      app_end_rank   = npes - size_iclient-size_oclient -1
  elseif(options%server_type == 'hybrid') then
      options%npes_iserver = npes - options%npes_oserver
      N_iclient_group = 1
      N_oclient_group = 1
      size_iclient =  N_iclient_group*options%npes_iserver
      client_start = 0
      app_start_rank = 0
      app_end_rank   = size_iclient - 1 
   endif
   directory_service = DirectoryService(MPI_COMM_WORLD)
   ! app + icilent comm
   my_icomm = MPI_COMM_NULL
   my_appcomm = MPI_COMM_NULL
   call pflogger_init()
   do i = 1, N_iclient_group
      low_rank = client_start + (i-1) * options%npes_iserver
      up_rank  = client_start + i*options%npes_iserver
      color = MPI_UNDEFINED
      if (( app_start_rank<= rank .and. rank <= app_end_rank ) .or. ( low_rank <= rank .and. rank < up_rank) ) then
        color = 1
      endif
      call MPI_comm_split(MPI_COMM_WORLD,color,key,local_comm_world(i), ierror)
      if (low_rank <= rank .and. rank < up_rank)  then
         my_comm_world = local_comm_world(i)
      endif
      color = MPI_UNDEFINED
      if ( app_start_rank<= rank .and. rank <= app_end_rank ) then
         color = 1
      endif
      if (low_rank <= rank .and. rank < up_rank)  then
         color = 2
      endif
      app_comms(i) = MPI_COMM_NULL
      if (local_comm_world(i) /= MPI_COMM_NULL) then 
         call MPI_comm_split(local_comm_world(i),color,key,app_comms(i), ierror)
      endif
      
      if ( app_start_rank<= rank .and. rank <= app_end_rank ) then
         my_appcomm = app_comms(i)
      endif
      if (low_rank <= rank .and. rank < up_rank) then
         my_icomm = app_comms(i)
      endif
   enddo
 
   ! app + ocilent comm
   my_ocomm = MPI_COMM_NULL
   do k = 1, N_oclient_group
      i = k + N_iclient_group
      low_rank = client_start+size_iclient + (k-1) * options%npes_oserver
      up_rank  = client_start+size_iclient + k*options%npes_oserver
      color = MPI_UNDEFINED
      if (( app_start_rank<= rank .and. rank <= app_end_rank ) .or. ( low_rank <= rank .and. rank < up_rank) ) then
        color = i
       endif
      call MPI_comm_split(MPI_COMM_WORLD,color,key,local_comm_world(i), ierror)
      if ( low_rank <= rank .and. rank < up_rank)  then
         my_comm_world = local_comm_world(i)
      endif
      color = MPI_UNDEFINED
      if ( app_start_rank<= rank .and. rank <= app_end_rank ) then
        color = i
      endif
      if (low_rank <= rank .and. rank < up_rank) then
        color = i+1
      endif
      app_comms(i) = MPI_COMM_NULL
      if (local_comm_world(i) /= MPI_COMM_NULL) then 
         call MPI_comm_split(local_comm_world(i),color,key,app_comms(i), ierror)
      endif
      
      if (low_rank <= rank .and. rank < up_rank) then
        my_ocomm = app_comms(i)
      endif
   enddo
   if (my_icomm /= MPI_COMM_NULL) then  
      allocate(iserver, source = MpiServer(my_icomm, 'iserver'))
      call directory_service%publish(PortInfo('iserver',iserver), iserver, rc=status)
      if( my_appcomm == MPI_COMM_NULL) then ! mpi server
         call directory_service%connect_to_client('iserver', iserver, rc=status)
         call iserver%start()
      endif
   endif
   if( my_ocomm /= MPI_COMM_NULl) then
     
      if (trim(options%server_type) == 'mpi' .or. &
          trim(options%server_type) == 'simple' .or. &
          trim(options%server_type) == 'hybrid' ) then 
        allocate(oserver, source = MpiServer(my_ocomm, 'oserver'))
      else if (trim(options%server_type) == 'multilayer') then
         allocate(oserver, source = MultiLayerServer(my_ocomm, 'oserver', &
               options%n_writer, options%writer))
      else if (trim(options%server_type) == 'multicomm') then
         allocate(oserver, source = MultiCommServer(my_ocomm, 'oserver', &
               options%n_writer))
      else if (trim(options%server_type) == 'multigroup') then
         allocate(oserver, source = MultiGroupServer(my_ocomm, 'oserver', &
               options%n_writer))
      endif
      call directory_service%publish(PortInfo('oserver',oserver), oserver, rc=status)
      if (my_appcomm == MPI_COMM_NULL) then 
         call directory_service%connect_to_client('oserver', oserver, rc=status)
         call oserver%start()
      endif
   endif
   if ( my_appcomm /= MPI_COMM_NULL) then
      
      print*,"start app rank:", rank
      call histData%init(options,app_comms,directory_service, N_iclient_group, N_oclient_group)
      call histData%run(step=1)
      call histData%run(step=2)
      call histData%finalize()
   end if
  
   call Mpi_Barrier(MPI_COMM_WORLD,ierror)
   call system_clock(c3)
   call directory_service%free_directory_resources()
   call Mpi_Barrier(MPI_COMM_WORLD,ierror)
   exit_code = 0
   if (rank == 0) then
      do md_id = 1, 2
         cmd=''
         out_file = 'test_out'//i_to_string(md_id)//'.nc4'
         cmd = '/usr/bin/diff test_in.nc4 '//trim(out_file)
         print*, "execute_command_line: ", cmd
         call execute_command_line(trim(cmd), exitstat = status )
         if (status == 0) then
            print*, 'test_in.nc4 and '//trim(out_file)//' are the same and thus removed'
            call execute_command_line('/bin/rm -f '//trim(out_file))
         else
            print*, 'test_in.nc4 and '//trim(out_file)//' differ'
            exit_code = 1
         endif
      enddo
      call execute_command_line('/bin/rm -f test_in.nc4')
   endif
   call MPI_finalize(ierror)
   if ( exit_code == 0) stop 0
   if ( exit_code == 1) stop 1
end program main