Commit 52110b46 authored by Christoph Heim's avatar Christoph Heim
Browse files

Added GEOS. Working on ARPEGE-NH

parent b0eda3bf
......@@ -4,7 +4,7 @@
description: Extract lat-lon box of data from model NICAM.
author: Christoph Heim
date created: 27.06.2019
date changed: 05.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
......@@ -16,15 +16,15 @@ from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer
from utilities import Timer, mergetime
###############################################################################
def sellatlon_NICAM(inp_file, out_file, dt, box, i_recompute):
def sellatlon_NICAM(inp_file, out_file, dt, box, options):
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not i_recompute:
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
TM.stop('cdo')
......@@ -76,8 +76,9 @@ if __name__ == '__main__':
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# recompute cdo
i_recompute = 0
# options for computation
options = {}
options['recompute'] = 0
###########################################################################
......@@ -134,7 +135,7 @@ if __name__ == '__main__':
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d}'.format(dt)+'.nc')
args.append( (inp_file, out_file, dt, box, i_recompute) )
args.append( (inp_file, out_file, dt, box, options) )
# run function serial or parallel
if n_tasks > 1:
......@@ -149,5 +150,8 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('real')
TM.print_report()
......@@ -4,7 +4,7 @@
description: Extract lat-lon box of data from model SAM.
author: Christoph Heim
date created: 20.06.2019
date changed: 05.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
......@@ -16,15 +16,15 @@ from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer
from utilities import Timer, mergetime
###############################################################################
def sellatlon_SAM(inp_file, out_file, dt, box, i_recompute):
def sellatlon_SAM(inp_file, out_file, dt, box, options):
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not i_recompute:
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('nco')
TM.stop('nco')
......@@ -66,6 +66,10 @@ def sellatlon_SAM(inp_file, out_file, dt, box, i_recompute):
print('\t\t{:%Y%m%d%H} completed'.format(dt))
TM.stop('cdo')
# delete tmp_file
if options['rm_tmp_files']:
os.remove(nco_file)
TM.stop('total')
return(TM)
......@@ -98,8 +102,10 @@ if __name__ == '__main__':
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# recompute cdo
i_recompute = 0
# options for computation
options = {}
options['recompute'] = 0
options['rm_tmp_files'] = 0
###########################################################################
......@@ -165,9 +171,9 @@ if __name__ == '__main__':
for i in range(len(use_times)):
inp_file = use_files[i]
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d%H}'.format(use_times[i])+'.nc')
var_name+'_{:%Y%m%d%H%M}'.format(use_times[i])+'.nc')
args.append( (inp_file, out_file, use_times[i], box,
i_recompute) )
options) )
# run function serial or parallel
if n_tasks > 1:
......@@ -182,5 +188,8 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('real')
TM.print_report()
......@@ -4,7 +4,7 @@
description: Extract lat-lon box of data from model ICON.
author: Christoph Heim
date created: 27.06.2019
date changed: 07.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
......@@ -16,33 +16,33 @@ from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer, write_grid_file
from utilities import Timer, write_grid_file, mergetime
###############################################################################
def comp_weights_file(target_grid, weights_file, inp_file, grid_def_file,
res, box):
res, box, options):
"""
"""
if not os.path.exists(target_grid):
if (not os.path.exists(target_grid)) or (options['recompute']):
write_grid_file(box, target_grid, res)
print('Compute weights file')
ofile = cdo.gennn(target_grid, input=(' -setgrid,'+grid_def_file+
' '+inp_file), output=weights_file,
options='-P 48')
options='-P 1')
def sellatlon_ICON(inp_file, out_file, grid_def_file, weights_file,
target_grid, dt, box, i_recompute):
target_grid, dt, box, options):
"""
"""
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not i_recompute:
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
TM.stop('cdo')
......@@ -96,8 +96,9 @@ if __name__ == '__main__':
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# recompute cdo
i_recompute = 0
# options for computation
options = {}
options['recompute'] = 0
###########################################################################
......@@ -183,12 +184,13 @@ if __name__ == '__main__':
var_name+'_{:%Y%m%d}'.format(dt)+'.nc')
args.append( (inp_file, out_file, grid_def_file,
weights_file, target_grid,
dt, box, i_recompute) )
dt, box, options) )
if not os.path.exists(weights_file):
if ((not os.path.exists(weights_file)) or
(not os.path.exists(target_grid))):
comp_weights_file(target_grid, weights_file,
inp_file, grid_def_file,
res, box)
res, box, options)
# run function serial or parallel
......@@ -204,5 +206,8 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('real')
TM.print_report()
......@@ -4,7 +4,7 @@
description: Extract lat-lon box of data from model UM.
author: Christoph Heim
date created: 05.07.2019
date changed: 05.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
......@@ -16,15 +16,15 @@ from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer
from utilities import Timer, mergetime
###############################################################################
def sellatlon_UM(inp_file, out_file, dt, box, i_recompute):
def sellatlon_UM(inp_file, out_file, dt, box, options):
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not i_recompute:
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
TM.stop('cdo')
......@@ -75,8 +75,9 @@ if __name__ == '__main__':
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# recompute cdo
i_recompute = 0
# options for computation
options = {}
options['recompute'] = 0
###########################################################################
......@@ -130,7 +131,7 @@ if __name__ == '__main__':
'*{:%Y%m%d}*'.format(dt)))[0]
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d}'.format(dt)+'.nc')
args.append( (inp_file, out_file, dt, box, i_recompute) )
args.append( (inp_file, out_file, dt, box, options) )
# run function serial or parallel
if n_tasks > 1:
......@@ -145,5 +146,8 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('real')
TM.print_report()
......@@ -4,7 +4,7 @@
description: Extract lat-lon box of data from model MPAS.
author: Christoph Heim
date created: 05.07.2019
date changed: 07.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
......@@ -16,15 +16,15 @@ from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer, write_grid_file
from utilities import Timer, write_grid_file, mergetime
###############################################################################
def comp_weights_file(target_grid, weights_file, inp_file, grid_def_file,
res, box):
res, box, options):
"""
"""
if not os.path.exists(target_grid):
if (not os.path.exists(target_grid)) or (options['recompute']):
write_grid_file(box, target_grid, res)
print('Compute weights file')
......@@ -32,16 +32,16 @@ def comp_weights_file(target_grid, weights_file, inp_file, grid_def_file,
input=(' -setgrid,mpas:'+grid_def_file+
' '+inp_file),
output=weights_file,
options='-P 48')
options='-P 1')
def sellatlon_MPAS(inp_file, out_file, dt, box, i_recompute, res, var_dict,
def sellatlon_MPAS(inp_file, out_file, dt, box, options, res, var_dict,
weights_file, target_grid):
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not i_recompute:
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('prepr')
TM.stop('prepr')
......@@ -66,13 +66,13 @@ def sellatlon_MPAS(inp_file, out_file, dt, box, i_recompute, res, var_dict,
if not os.path.exists(weights_file):
comp_weights_file(target_grid, weights_file,
tmp_file, grid_def_file,
res, box)
res, box, options)
TM.stop('prepr')
# cdo
TM.start('cdo')
print('\t{:%Y%m%d%H}'.format(dt))
ofile = cdo.remap(target_grid, weights_file,
input=(
' -sellevidx,'+
......@@ -81,6 +81,10 @@ def sellatlon_MPAS(inp_file, out_file, dt, box, i_recompute, res, var_dict,
' '+tmp_file),
output=out_file, options='-f nc')
# delete tmp_file
if options['rm_tmp_files']:
os.remove(tmp_file)
TM.stop('cdo')
TM.stop('total')
......@@ -111,13 +115,16 @@ if __name__ == '__main__':
# model resolutions [km] of simulations
ress = [7.5, 3.75]
#ress = [3.75]
ress = [7.5]
# date range
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# recompute cdo
i_recompute = 0
# options for computation
options = {}
options['recompute'] = 0
options['rm_tmp_files'] = 1
###########################################################################
......@@ -199,17 +206,11 @@ if __name__ == '__main__':
inp_dir,var_dict[var_name]['type']+
'.{:%Y-%m-%d_%H.%M.%S}.nc'.format(dt)))[0]
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d}'.format(dt)+'.nc')
args.append( (inp_file, out_file, dt, box, i_recompute,
var_name+'_{:%Y%m%d%H%M}'.format(dt)+'.nc')
args.append( (inp_file, out_file, dt, box, options,
res, var_dict[var_name], weights_file,
target_grid) )
#if not os.path.exists(weights_file):
# comp_weights_file(target_grid, weights_file,
# inp_file, grid_def_file,
# res, box)
# run function serial or parallel
if n_tasks > 1:
with Pool(processes=n_tasks) as pool:
......@@ -223,5 +224,8 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('real')
TM.print_report()
......@@ -4,7 +4,7 @@
description: Extract lat-lon box of data from model IFS.
author: Christoph Heim
date created: 05.07.2019
date changed: 07.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
......@@ -16,15 +16,16 @@ from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer
from utilities import Timer, mergetime
###############################################################################
def sellatlon_IFS(inp_file, out_file, dt, box, i_recompute, var_dict):
def sellatlon_IFS(inp_file, out_file, dt, box, options, var_dict):
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not i_recompute:
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
TM.stop('cdo')
......@@ -56,6 +57,10 @@ def sellatlon_IFS(inp_file, out_file, dt, box, i_recompute, var_dict):
' '+tmp_file),
output=out_file, options='-f nc')
# delete tmp_file
if options['rm_tmp_files']:
os.remove(tmp_file)
TM.stop('cdo')
TM.stop('total')
......@@ -81,18 +86,20 @@ if __name__ == '__main__':
# variables to extract
var_names = ['QC', 'T']
var_names = ['T']
#var_names = ['T']
# model resolutions [km] of simulations
ress = [4,9]
#ress = [9]
ress = [9,4]
ress = [4]
# date range
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# recompute cdo
i_recompute = 0
# options for computation
options = {}
options['recompute'] = 0
options['rm_tmp_files'] = 1
###########################################################################
......@@ -159,9 +166,9 @@ if __name__ == '__main__':
for i in range(len(use_times)):
inp_file = use_files[i]
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d%H}'.format(use_times[i])+'.nc')
var_name+'_{:%Y%m%d%H%M}'.format(use_times[i])+'.nc')
args.append( (inp_file, out_file, use_times[i], box,
i_recompute, var_dict[var_name]) )
options, var_dict[var_name]) )
# run function serial or parallel
if n_tasks > 1:
......@@ -176,5 +183,9 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('real')
TM.print_report()
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
description: Extract lat-lon box of data from model GEOS.
author: Christoph Heim
date created: 09.07.2019
date changed: 09.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
"""
###############################################################################
import os, glob, subprocess, sys, time
import numpy as np
from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer, mergetime
###############################################################################
def sellatlon_GEOS(inp_file, out_file, dt, box, options, var_name, var_dict):
TM = Timer()
TM.start('total')
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
TM.stop('cdo')
else:
# cdo
TM.start('cdo')
print('\t{:%Y%m%d%H}'.format(dt))
ofile = cdo.sellonlatbox(
box['lon0'],box['lon1'],
box['lat0'],box['lat1'],
input=(' -sellevidx,'+str(box['vert0'])+'/'+
str(box['vert1'])+
' -selname,'+var_dict[var_name]['key']+
' '+inp_file),
output=out_file)
TM.stop('cdo')
TM.stop('total')
return(TM)
if __name__ == '__main__':
# GENERAL SETTINGS
###########################################################################
# input and output directories
raw_data_dir = os.path.join('/work','ka1081','DYAMOND')
out_base_dir = os.path.join('/work','ka1081','2019_06_Hackathon_Mainz',
'christoph_heim','newdata')
# lat lon vert box to subselect
box = {'lon0': 265, 'lon1': 281, 'lat0': -24, 'lat1': -14,
'vert0':1,'vert1':13}
# name of model
model_name = 'GEOS'
# variables to extract
var_names = ['QC', 'T', 'H']
#var_names = ['QC']
# model resolutions [km] of simulations
ress = [3]
# date range
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# options for computation
options = {}
options['recompute'] = 0
###########################################################################
# GEOS SPECIFIC SETTINGS
###########################################################################
var_dict = {
'QC':{'file':'geosgcm_prog','key':'QL'},
'T' :{'file':'geosgcm_prog','key':'T'},
'H' :{'file':'geosgcm_prog','key':'H'},
}
inc_min = {'geosgcm_prog':360}
run_specif_name = '-MOM_NoDeepCu'
###########################################################################
## PREPARING STEPS
TM = Timer()
TM.start('real')
cdo = Cdo()
if len(sys.argv) > 1:
n_tasks = int(sys.argv[1])
else:
n_tasks = 1
print('Using ' + str(n_tasks) + ' taks.')
## EXTRACT VARIABLES FROM SIMULATIONS
for var_name in var_names:
#var_name = 'T'
print('############## var ' + var_name + ' ##################')
dt_range = np.arange(first_date, last_date + timedelta(days=1),
timedelta(minutes=
inc_min[var_dict[var_name]['file']])).tolist()
for res in ress:
print('############## res ' + str(res) + ' ##################')
#res = 4
sim_name = model_name + '-' + str(res) + 'km'+run_specif_name
inp_dir = os.path.join(raw_data_dir, sim_name)
# directory for final model output (after mergetime)
out_dir = os.path.join(out_base_dir, model_name + '_' + str(res))
Path(out_dir).mkdir(parents=True, exist_ok=True)
# directory for output of files in time merge level of raw model
# output
out_tmp_dir = os.path.join(out_base_dir, model_name +
'_' + str(res),'tmp')
Path(out_tmp_dir).mkdir(parents=True, exist_ok=True)
# find times and files that should be extracted
# and prepare arguments for function
args = []
for dt in dt_range:
inp_file = glob.glob(os.path.join(inp_dir,var_dict[var_name]['file'],
'*{:%Y%m%d_%H%M}z.nc4'.format(dt)))[0]
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d%H%M}'.format(dt)+'.nc')
args.append( (inp_file, out_file, dt, box, options, var_name,
var_dict) )
# run function serial or parallel
if n_tasks > 1:
with Pool(processes=n_tasks) as pool:
results = pool.starmap(sellatlon_GEOS, args)
else:
results = []
for arg in args:
results.append(sellatlon_GEOS(*arg))
# collect timings from subtasks
for task_TM in results:
TM.merge_timings(task_TM)