Commit e5f5e549 authored by Christoph Heim's avatar Christoph Heim
Browse files

Improved a lot of things and extracting LWUTOA and TQC.

parent d1783115
......@@ -4,53 +4,52 @@
description: Extract lat-lon box of data from model NICAM.
author: Christoph Heim
date created: 27.06.2019
date changed: 10.07.2019
date changed: 15.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
"""
###############################################################################
import os, glob, subprocess, sys, time
import os, glob, subprocess, sys, time, shutil
import numpy as np
from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer, mergetime
from package.utilities import Timer, cdo_mergetime
from namelist import domain
from functions import paste_dir_names
###############################################################################
def sellatlon_NICAM(inp_file, out_file, dt, box, options, var_dict):
def sellatlon_NICAM(inp_file, out_file, dt, box, options, var_dict,
var_name, res):
TM = Timer()
TM.start('total')
file_code = '{}km_{}_{:%Y%m%d}'.format(res, var_name, dt)
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
print('\t\t'+file_code+' already computed')
TM.stop('cdo')
else:
# cdo
TM.start('cdo')
#print('\t{:%Y%m%d%H} cdo'.format(dt))
print('\t\t'+file_code)
if var_dict['dim'] == '3D':
ofile = cdo.sellonlatbox(
box['lon0'],box['lon1'],
box['lat0'],box['lat1'],
input=('-sellevidx,'+str(box['vert0'])+'/'+
str(box['vert1'])+' '+inp_file),
output=out_file)
input = ('-sellevidx,'+str(box['vert0'])+'/'+
str(box['vert1'])+' '+inp_file)
elif var_dict['dim'] == '2D':
ofile = cdo.sellonlatbox(
box['lon0'],box['lon1'],
box['lat0'],box['lat1'],
input=(inp_file),
output=out_file)
input=inp_file
print('\t\t{:%Y%m%d%H} completed'.format(dt))
TM.stop('cdo')
ofile = cdo.sellonlatbox(
box['lon'].start,box['lon'].stop,
box['lat'].start,box['lat'].stop,
input=input,
output=out_file)
TM.stop('total')
TM.stop('cdo')
return(TM)
......@@ -65,8 +64,8 @@ if __name__ == '__main__':
'christoph_heim','newdata')
# lat lon vert box to subselect
box = {'lon0': 265, 'lon1': 281, 'lat0': -24, 'lat1': -14,
'vert0':1,'vert1':18}
box = domain
box.update({'vert0':1,'vert1':18})
# name of model
model_name = 'NICAM'
......@@ -74,11 +73,13 @@ if __name__ == '__main__':
# variables to extract
var_names = ['QV', 'QC', 'T', 'W',
'U10M', 'V10M', 'T2M', 'LWUTOA', 'SWDSFC',
'SLHFLX', 'SSHFLX']
'SLHFLX', 'SSHFLX', 'TQC']
var_names = ['LWUTOA', 'TQC']
# model resolutions [km] of simulations
ress = [7, 3.5]
#ress = [7]
#ress = [3.5]
# date range
first_date = datetime(2016,8,10)
......@@ -86,7 +87,8 @@ if __name__ == '__main__':
# options for computation
options = {}
options['recompute'] = 0
options['recompute'] = 0
options['rm_tmp_folder'] = 0
###########################################################################
......@@ -105,12 +107,12 @@ if __name__ == '__main__':
'SWDSFC':{'file':'ss_swd_sfc', 'dim':'2D', },
'SLHFLX':{'file':'ss_lh_sfc', 'dim':'2D', },
'SSHFLX':{'file':'ss_sh_sfc', 'dim':'2D', },
'TQC' :{'file':'sa_cldw', 'dim':'2D', },
}
###########################################################################
## PREPARING STEPS
TM = Timer()
TM.start('real')
dt_range = np.arange(first_date, last_date + timedelta(days=1),
timedelta(days=1)).tolist()
......@@ -124,27 +126,30 @@ if __name__ == '__main__':
print('Using ' + str(n_tasks) + ' taks.')
## EXTRACT VARIABLES FROM SIMULATIONS
# args used as input to parallel executed function
args = []
for var_name in var_names:
#var_name = 'T'
print('############## var ' + var_name + ' ##################')
for res in ress:
print('############## res ' + str(res) + ' ##################')
#res = 4
# data input directory
sim_name = model_name + '-' + str(res) + 'km'
inp_dir = os.path.join(raw_data_dir, sim_name)
# directory for final model output (after mergetime)
out_dir = os.path.join(out_base_dir, model_name + '_' + str(res))
# out_dir = directory for final model output (after mergetime)
# out_tmp_dir = directory for output of files in time merge
# level of raw model output
out_dir, out_tmp_dir = paste_dir_names(out_base_dir,
model_name, res, domain)
Path(out_dir).mkdir(parents=True, exist_ok=True)
# directory for output of files in time merge level of raw model
# output
out_tmp_dir = os.path.join(out_base_dir, model_name +
'_' + str(res),'tmp')
# remove temporary fils if desired
if options['rm_tmp_folder'] and os.path.exists(out_tmp_dir):
shutil.rmtree(out_tmp_dir)
Path(out_tmp_dir).mkdir(parents=True, exist_ok=True)
# find times and files that should be extracted
# and prepare arguments for function
args = []
for dt in dt_range:
inp_files_glob = glob.glob(os.path.join(inp_dir,
'{:%Y%m%d}*'.format(dt)))
......@@ -154,23 +159,28 @@ if __name__ == '__main__':
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d}'.format(dt)+'.nc')
args.append( (inp_file, out_file, dt, box, options,
var_dict[var_name]) )
var_dict[var_name], var_name, res) )
# run function serial or parallel
if n_tasks > 1:
with Pool(processes=n_tasks) as pool:
results = pool.starmap(sellatlon_NICAM, args)
else:
results = []
for arg in args:
results.append(sellatlon_NICAM(*arg))
# run function serial or parallel
if n_tasks > 1:
with Pool(processes=n_tasks) as pool:
results = pool.starmap(sellatlon_NICAM, args)
else:
results = []
for arg in args:
results.append(sellatlon_NICAM(*arg))
# collect timings from subtasks
for task_TM in results:
TM.merge_timings(task_TM)
# collect timings from subtasks
for task_TM in results:
TM.merge_timings(task_TM)
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
# merge all time step files to one
TM.start('merge')
for var_name in var_names:
for res in ress:
out_dir, out_tmp_dir = paste_dir_names(out_base_dir,
model_name, res, domain)
cdo_mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('merge')
TM.stop('real')
TM.print_report()
......@@ -4,35 +4,37 @@
description: Extract lat-lon box of data from model SAM.
author: Christoph Heim
date created: 20.06.2019
date changed: 10.07.2019
date changed: 15.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
"""
###############################################################################
import os, glob, subprocess, sys, time
import os, glob, subprocess, sys, time, shutil
import numpy as np
from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer, mergetime
from package.utilities import Timer, cdo_mergetime
from namelist import domain
from functions import paste_dir_names
###############################################################################
def sellatlon_SAM(inp_file, out_file, dt, box, options):
def sellatlon_SAM(inp_file, out_file, dt, box, options, var_name, res):
TM = Timer()
TM.start('total')
file_code = '{}km_{}_{:%Y%m%d%H%M}'.format(res, var_name, dt)
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('nco')
TM.stop('nco')
TM.start('cdo')
print('\t\t'+file_code+' already computed')
TM.stop('nco')
TM.stop('cdo')
else:
print('\t\t{:%Y%m%d%H}'.format(dt))
print('\t\t'+file_code)
split = os.path.split(out_file)
nco_file = os.path.join(split[0],'nco_'+split[1])
......@@ -40,7 +42,6 @@ def sellatlon_SAM(inp_file, out_file, dt, box, options):
# nco
if not os.path.exists(nco_file):
TM.start('nco')
#print('\t{:%Y%m%d%H} nco'.format(dt))
bash_command = ('ncatted -O -a units,lon,o,c,degrees_east ' +
'-a units,lat,o,c,degrees_north '+inp_file+
' '+nco_file)
......@@ -54,10 +55,9 @@ def sellatlon_SAM(inp_file, out_file, dt, box, options):
# cdo
TM.start('cdo')
#print('\t{:%Y%m%d%H} cdo'.format(dt))
ofile = cdo.sellonlatbox(
box['lon0'],box['lon1'],
box['lat0'],box['lat1'],
box['lon'].start,box['lon'].stop,
box['lat'].start,box['lat'].stop,
input=('-sellevidx,'+str(box['vert0'])+'/'+
str(box['vert1'])+' -settaxis,'+
'{:%Y-%m-%d,%H:%M:%S,3hour}'.format(dt)+
......@@ -70,8 +70,6 @@ def sellatlon_SAM(inp_file, out_file, dt, box, options):
if options['rm_tmp_files']:
os.remove(nco_file)
TM.stop('total')
return(TM)
......@@ -85,18 +83,17 @@ if __name__ == '__main__':
'christoph_heim','newdata')
# lat lon vert box to subselect
box = {'lon0': 265, 'lon1': 281, 'lat0': -24, 'lat1': -14,
'vert0':1,'vert1':28}
box = domain
box.update({'vert0':1,'vert1':28})
# name of model
model_name = 'SAM'
# variables to extract
var_names = ['QC', 'T']
var_names = ['QV', 'QC', 'T', 'W',
'U10M', 'V10M', 'T2M', 'LWUTOA', 'SWDSFC',
'SLHFLX', 'SSHFLX']
#var_names = ['T2M']
'SLHFLX', 'SSHFLX', 'TQC']
var_names = ['LWUTOA', 'TQC']
# model resolutions [km] of simulations
ress = [4]
......@@ -109,7 +106,7 @@ if __name__ == '__main__':
options = {}
options['recompute'] = 0
options['rm_tmp_files'] = 1
#options['redo_mergetime'] = 1
options['rm_tmp_folder'] = 0
###########################################################################
......@@ -139,6 +136,8 @@ if __name__ == '__main__':
'loc':'OUT_2D','fntime':(-20,-10),},
'SSHFLX':{'file':'SHF',
'loc':'OUT_2D','fntime':(-20,-10),},
'TQC' :{'file':'CWP',
'loc':'OUT_2D','fntime':(-20,-10),},
}
......@@ -148,7 +147,6 @@ if __name__ == '__main__':
## PREPARING STEPS
TM = Timer()
TM.start('real')
cdo = Cdo()
if len(sys.argv) > 1:
......@@ -159,22 +157,22 @@ if __name__ == '__main__':
## EXTRACT VARIABLES FROM SIMULATIONS
for var_name in var_names:
#var_name = 'T'
print('############## var ' + var_name + ' ##################')
for res in ress:
print('############## res ' + str(res) + ' ##################')
#res = 4
sim_name = model_name + '-' + str(res) + 'km'
inp_dir = os.path.join(raw_data_dir, sim_name,
var_dict[var_name]['loc'])
# directory for final model output (after mergetime)
out_dir = os.path.join(out_base_dir, model_name + '_' + str(res))
# out_dir = directory for final model output (after mergetime)
# out_tmp_dir = directory for output of files in time merge
# level of raw model output
out_dir, out_tmp_dir = paste_dir_names(out_base_dir,
model_name, res, domain)
Path(out_dir).mkdir(parents=True, exist_ok=True)
# directory for output of files in time merge level of raw model
# output
out_tmp_dir = os.path.join(out_base_dir, model_name +
'_' + str(res),'tmp')
# remove temporary fils if desired
if options['rm_tmp_folder'] and os.path.exists(out_tmp_dir):
shutil.rmtree(out_tmp_dir)
Path(out_tmp_dir).mkdir(parents=True, exist_ok=True)
# find times and files that should be extracted
......@@ -203,7 +201,7 @@ if __name__ == '__main__':
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d%H%M}'.format(use_times[i])+'.nc')
args.append( (inp_file, out_file, use_times[i], box,
options) )
options, var_name, res) )
# run function serial or parallel
if n_tasks > 1:
......@@ -218,8 +216,9 @@ if __name__ == '__main__':
for task_TM in results:
TM.merge_timings(task_TM)
TM.start('merge')
# merge all time step files to one
mergetime(out_tmp_dir, out_dir, var_name)
cdo_mergetime(out_tmp_dir, out_dir, var_name)
TM.stop('merge')
TM.stop('real')
TM.print_report()
......@@ -4,19 +4,21 @@
description: Extract lat-lon box of data from model ICON.
author: Christoph Heim
date created: 27.06.2019
date changed: 10.07.2019
date changed: 15.07.2019
usage: arguments:
1st: n jobs for multiprocessing pool
python: 3.5.2
"""
###############################################################################
import os, glob, subprocess, sys, time
import os, glob, subprocess, sys, time, shutil
import numpy as np
from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
from utilities import Timer, write_grid_file, mergetime
from package.utilities import Timer, write_grid_file, cdo_mergetime
from namelist import domain
from functions import paste_dir_names
###############################################################################
......@@ -35,20 +37,22 @@ def comp_weights_file(target_grid, weights_file, inp_file, grid_def_file,
def sellatlon_ICON(inp_file, out_file, grid_def_file, weights_file,
target_grid, dt, box, options, var_dict):
target_grid, dt, box, options, var_dict, var_name,
res):
"""
"""
TM = Timer()
TM.start('total')
file_code = '{}km_{}_{:%Y%m%d}'.format(res, var_name, dt)
if os.path.exists(out_file) and not options['recompute']:
print('\t\t{:%Y%m%d%H} already computed -> skip'.format(dt))
TM.start('cdo')
print('\t\t'+file_code+' already computed')
TM.stop('cdo')
else:
# cdo
TM.start('cdo')
print('\t\t'+file_code)
if var_dict['dim'] == '3d':
ofile = cdo.remap(target_grid, weights_file,
......@@ -57,20 +61,17 @@ def sellatlon_ICON(inp_file, out_file, grid_def_file, weights_file,
str(box['vert0'])+'/'+str(box['vert1'])+
' -setgrid,'+grid_def_file+
' '+inp_file),
output=out_file, options='-P 1 -f nc')
output=out_file, options='-f nc')
elif var_dict['dim'] == '2d':
ofile = cdo.remap(target_grid, weights_file,
input=(
' -setgrid,'+grid_def_file+
#' -selname,'+var_dict['key']+
' -selname,'+var_dict['key']+
' '+inp_file),
output=out_file, options='-P 1 -f nc')
output=out_file, options='-f nc')
print('\t\t{:%Y%m%d%H} completed'.format(dt))
TM.stop('cdo')
TM.stop('total')
return(TM)
......@@ -84,79 +85,71 @@ if __name__ == '__main__':
'christoph_heim','newdata')
# vert box to subselect
box = {'lon0': 265, 'lon1': 281, 'lat0': -24, 'lat1': -14,
'vert0':73-14,'vert1':91-14}
box = domain
box.update({'vert0':73-14,'vert1':91-14})
# name of model
model_name = 'ICON'
# variables to extract
var_names = ['QC', 'T']
var_names = ['QV', 'QC', 'T', 'W',
'U10M', 'V10M', 'T2M', 'LWUTOA', 'SWTSFC', 'SWDIFFUSFC',
'SLHFLX', 'SSHFLX']
var_names = ['SLHFLX']
#var_names = ['T']
'U10M', 'V10M', 'T2M', 'LWUTOA', 'SWNSFC', 'SWDIFFUSFC',
'SLHFLX', 'SSHFLX', 'TQC']
var_names = ['LWUTOA', 'TQC']
# model resolutions [km] of simulations
ress = [10,5,2.5]
ress = [10,2.5]
#ress = [5]
ress = [10]
#ress = [10]
#ress = [2.5]
# date range
first_date = datetime(2016,8,10)
last_date = datetime(2016,8,10)
last_date = datetime(2016,8,19)
# options for computation
options = {}
options['recompute'] = 0
options['recompute'] = 0
options['rm_tmp_folder'] = 0
###########################################################################
# ICON SPECIFIC SETTINGS
###########################################################################
grid_def_base_dir = os.path.join('/work','bk1040','experiments', 'input')
var_dict = {
'QV' :{'file':'qv', 'dim':'3d', },
'QC' :{'file':'tot_qc_dia', 'dim':'3d', },
'T' :{'file':'t', 'dim':'3d', },
'W' :{'file':'w', 'dim':'3d', },
'U10M' :{'file':'atm3_2d_ml', 'dim':'2d', 'key':'10u'},
'V10M' :{'file':'atm3_2d_ml', 'dim':'2d', 'key':'10v'},
'T2M' :{'file':'atm3_2d_ml', 'dim':'2d', 'key':'2t'},
'LWUTOA' :{'file':'atm_2d_avg_ml', 'dim':'2d', 'key':'nlwrf_2'},
'SWTSFC' :{'file':'atm_2d_avg_ml', 'dim':'2d', 'key':'nswrf'},
'SWDIFFUSFC':{'file':'atm_2d_avg_ml', 'dim':'2d', 'key':'uswrf_2'},
'SLHFLX' :{'file':'atm2_2d_ml', 'dim':'2d', 'key':'SHFL_S'},
'SSHFLX' :{'file':'atm2_2d_ml', 'dim':'2d', 'key':'LHFL_S'},
'U10M' :{'file':'atm3_2d_ml', 'dim':'2d', 'key':'U_10M'},
'V10M' :{'file':'atm3_2d_ml', 'dim':'2d', 'key':'V_10M'},
'T2M' :{'file':'atm3_2d_ml', 'dim':'2d', 'key':'T_2M'},
'LWUTOA' :{'file':'atm_2d_avg_ml', 'dim':'2d', 'key':'ATHB_T'},
'SWNSFC' :{'file':'atm_2d_avg_ml', 'dim':'2d', 'key':'ASOB_S'},
'SWDIFFUSFC':{'file':'atm_2d_avg_ml', 'dim':'2d', 'key':'ASWDIFU_S'},
'SLHFLX' :{'file':'atm2_2d_ml', 'dim':'2d', 'key':'LHFL_S'},
'SSHFLX' :{'file':'atm2_2d_ml', 'dim':'2d', 'key':'SHFL_S'},
'TQC' :{'file':'atm1_2d_ml', 'dim':'2d', 'key':'TQC_DIA'},
}
grid_dict = {
10: {'grid_def_file':os.path.join(grid_def_base_dir,
'10km/icon_grid_0025_R02B08_G.nc'),
'weights_file':'weights_10km',
'target_grid':'grids/latlon_dx_10.00km',
},
5: {'grid_def_file':os.path.join(grid_def_base_dir,
'5km_2/icon_grid_0015_R02B09_G.nc'),
'weights_file':'weights_5km',
'target_grid':'grids/latlon_dx_05.00km',
},
2.5:{'grid_def_file':os.path.join(grid_def_base_dir,
'2.5km/icon_grid_0017_R02B10_G.nc'),
'weights_file':'weights_2.5km',
'target_grid':'grids/latlon_dx_02.50km',
},
}
###########################################################################
## PREPARING STEPS
TM = Timer()
TM.start('real')
dt_range = np.arange(first_date, last_date + timedelta(days=1),
timedelta(days=1)).tolist()
......@@ -170,22 +163,25 @@ if __name__ == '__main__':
print('Using ' + str(n_tasks) + ' taks.')
## EXTRACT VARIABLES FROM SIMULATIONS
for var_name in var_names:
#var_name = 'T'
print('############## var ' + var_name + ' ##################')
for res in ress:
print('############## res ' + str(res) + ' ##################')
#res = 4
# args used as input to parallel executed function
args = []
for res in ress:
print('############## res ' + str(res) + ' ##################')
for var_name in var_names:
print('\t############## var ' + var_name + ' ##################')
sim_name = model_name + '-' + str(res) + 'km'
inp_dir = os.path.join(raw_data_dir, sim_name)
# directory for final model output (after mergetime)
out_dir = os.path.join(out_base_dir, model_name + '_' + str(res))
# out_dir = directory for final model output (after mergetime)
# out_tmp_dir = directory for output of files in time merge
# level of raw model output
out_dir, out_tmp_dir = paste_dir_names(out_base_dir,
model_name, res, domain)
Path(out_dir).mkdir(parents=True, exist_ok=True)
# directory for output of files in time merge level of raw model
# output
out_tmp_dir = os.path.join(out_base_dir, model_name +
'_' + str(res),'tmp')
# remove temporary fils if desired
if options['rm_tmp_folder'] and os.path.exists(out_tmp_dir):
shutil.rmtree(out_tmp_dir)
Path(out_tmp_dir).mkdir(parents=True, exist_ok=True)
# prepare grid interpolation
......@@ -193,49 +189,55 @@ if __name__ == '__main__':
grid_def_file = grid_dict[res]['grid_def_file']
# weights file to compute once for a grid
weights_file = os.path.join(out_tmp_dir,
grid_dict[res]['weights_file'])
'weights_{}km_dom_{}'.format(res,
domain['code']))
# target grid on which to interpolate the model output
target_grid = grid_dict[res]['target_grid']
target_grid = os.path.join('grids','latlon_{}km_dom_{}'.format(
res, domain['code']))
# find times and files that should be extracted
# and prepare arguments for function
args = []
for dt in dt_range:
inp_files_glob = glob.glob(os.path.join(inp_dir,
'*_{}_*{:%Y%m%d}*'.format(
var_dict[var_name]['file'], dt)))
inp_file = os.path.join(inp_files_glob[0])
#print(inp_file)
#quit()
out_file = os.path.join(out_tmp_dir,
var_name+'_{:%Y%m%d}'.format(dt)+'.nc')
args.append( (inp_file, out_file, grid_def_file,