07_geos.py 10.9 KB
Newer Older
1
2
3
4
5
6
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
description:    Extract lat-lon box of data from model GEOS.
author:         Christoph Heim
date created:   09.07.2019
7
date changed:   27.02.2020
8
9
10
11
usage:          arguments:
                1st:    n jobs for multiprocessing pool
"""
###############################################################################
12
import os, glob, subprocess, sys, time, shutil
13
14
15
16
17
import numpy as np
from datetime import datetime, timedelta
from multiprocessing import Pool
from pathlib import Path
from cdo import Cdo
18
from package.utilities import Timer, cdo_mergetime
Christoph Heim's avatar
Christoph Heim committed
19
from namelist import domain, padding
20
from functions import paste_dir_names
21
22
###############################################################################

23
24
def sellatlon_GEOS(inp_file, out_file, dt, box, options, var_name, var_dict,
                   res):
25
26
    
    TM = Timer()
27
    file_code = '{}km_{}_{:%Y%m%d%H%M}'.format(res, var_name, dt)
28
29
30
    
    if os.path.exists(out_file) and not options['recompute']:
        TM.start('cdo')
31
        print('\t\t'+file_code+' already computed')
32
33
        TM.stop('cdo')
    else:
34
        print('\t\t'+file_code)
35
36
37
38

        # cdo
        TM.start('cdo')
        ofile = cdo.sellonlatbox(
39
40
                    box['lon'].start, box['lon'].stop,
                    box['lat'].start, box['lat'].stop,
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
                    input=(' -sellevidx,'+str(box['vert0'])+'/'+
                           str(box['vert1'])+
                           ' -selname,'+var_dict[var_name]['key']+
                           ' '+inp_file),
                    output=out_file)

        TM.stop('cdo')

    return(TM)


if __name__ == '__main__':

    # GENERAL SETTINGS
    ###########################################################################
    # input and output directories
    raw_data_dir = os.path.join('/work','ka1081','DYAMOND')
    out_base_dir = os.path.join('/work','ka1081','2019_06_Hackathon_Mainz',
59
                                'christoph_heim','newdata')
60

61
62
    # box to subselect
    box = domain
63
64
    #box.update({'vert0':1,'vert1':13}) # top 3km
    box.update({'vert0':1,'vert1':18}) # top 6km
Christoph Heim's avatar
Christoph Heim committed
65
66
    box['lon'] = slice(box['lon'].start - padding, box['lon'].stop + padding)
    box['lat'] = slice(box['lat'].start - padding, box['lat'].stop + padding)
67
68
69
70
71

    # name of model 
    model_name = 'GEOS'

    # variables to extract
72
73
74
75
76
77
78
79
80
    var_names = ['QV', 'QC', 'T', 'H', 'W', 'U', 'V', 'P',
                 'PS', 'U10M', 'V10M', 'T2M',
                 'LWUTOA', 'SWNDTOA',
                 'SLHFLX', 'SSHFLX',
                 'TQC', 'TQI', 'TQV',
                 'PP']
    #var_names = ['PP']
    # missing: SST CLCL CLCT
    #var_names = ['P']
81
82
83
84
85
86
87

    """
    parallel tasks:
    3km
    2D: ok: 18
    3D: ok: 12
    """
88
89
90
91
92
    
    # model resolutions [km] of simulations
    ress = [3]
    
    # date range
Christoph Heim's avatar
Christoph Heim committed
93
    first_date = datetime(2016,8,1)
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    last_date = datetime(2016,9,9)
    #last_date = datetime(2016,8,1)

    #first_date = datetime(2016,8,1)
    #last_date = datetime(2016,8,10)

    #first_date = datetime(2016,8,11)
    #last_date = datetime(2016,8,20)

    #first_date = datetime(2016,8,21)
    #last_date = datetime(2016,8,30)

    #first_date = datetime(2016,8,31)
    #last_date = datetime(2016,9,9)
108
109
110

    # options for computation
    options = {}
111
112
    options['recompute']        = 0
    options['rm_tmp_folder']    = 0
113
114
115
116
117
118
    ###########################################################################


    # GEOS SPECIFIC SETTINGS
    ###########################################################################
    var_dict = {
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
        'QV'        :{'folder':'inst', 'inc':'03hr', 'file':'3d_QV_Mv',     'key':'QV'}, 
        'QC'        :{'folder':'inst', 'inc':'03hr', 'file':'3d_QL_Mv',     'key':'QL'}, 
        'T'         :{'folder':'inst', 'inc':'03hr', 'file':'3d_T_Mv',      'key':'T'}, 
        'H'         :{'folder':'inst', 'inc':'03hr', 'file':'3d_H_Mv',      'key':'H'}, 
        'W'         :{'folder':'inst', 'inc':'03hr', 'file':'3d_W_Mv',      'key':'W'}, 
        'U'         :{'folder':'inst', 'inc':'03hr', 'file':'3d_U_Mv',      'key':'U'}, 
        'V'         :{'folder':'inst', 'inc':'03hr', 'file':'3d_V_Mv',      'key':'V'}, 
        'P'         :{'folder':'inst', 'inc':'03hr', 'file':'3d_P_Mv',      'key':'P'}, 

        'PS'        :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'PS'}, 
        'U10M'      :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'U10M'}, 
        'V10M'      :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'V10M'}, 
        'T2M'       :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'T2M'}, 
        'LWUTOA'    :{'folder':'tavg', 'inc':'15mn', 'file':'2d_flx_Mx',    'key':'OLR'}, 
        'SWNDTOA'   :{'folder':'tavg', 'inc':'15mn', 'file':'2d_flx_Mx',    'key':'SWTNET'}, 
        'SLHFLX'    :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'EFLUX'}, 
        'SSHFLX'    :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'HFLUX'}, 
        'TQC'       :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'CWP'}, 
        'TQI'       :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'IWP'}, 
        'TQV'       :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'TQV'}, 

        'PP'        :{'folder':'inst', 'inc':'15mn', 'file':'2d_asm_Mx',    'key':'PRECTOT'}, 
141
    }
142
143
144
    inc_min = {'15mn':30, '03hr':180}
    #offset_min = {'geosgcm_prog':0, 'geosgcm_conv':0, 'geosgcm_surf':90}
    run_specif_name = ''
145
146
    ###########################################################################

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

    ## GEOS SPECIFIC SETTINGS
    ############################################################################
    #var_dict = {
    #    'QV'        :{'file':'geosgcm_prog','key':'QV'}, 
    #    'QC'        :{'file':'geosgcm_prog','key':'QL'}, 
    #    'T'         :{'file':'geosgcm_prog','key':'T'}, 
    #    'H'         :{'file':'geosgcm_prog','key':'H'}, 
    #    'W'         :{'file':'geosgcm_prog','key':'W'}, 

    #    'U10M'      :{'file':'geosgcm_surf','key':'U10M'}, 
    #    'V10M'      :{'file':'geosgcm_surf','key':'V10M'}, 
    #    'T2M'       :{'file':'geosgcm_surf','key':'T2M'}, 
    #    'LWUTOA'    :{'file':'geosgcm_surf','key':'OLR'}, 
    #    'SWDSFC'    :{'file':'geosgcm_surf','key':'SWGDWN'}, 
    #    'SWNDTOA'   :{'file':'geosgcm_surf','key':'SWTNET'}, 
    #    'SLHFLX'    :{'file':'geosgcm_surf','key':'LHFX'}, 
    #    'SSHFLX'    :{'file':'geosgcm_surf','key':'SHFX'}, 
    #    'TQC'       :{'file':'geosgcm_conv','key':'CWP'}, 
    #    'TQI'       :{'file':'geosgcm_conv','key':'IWP'}, 
    #    'PPCONV'    :{'file':'geosgcm_surf','key':'CNPRCP'}, 
    #    'PPGRID'    :{'file':'geosgcm_surf','key':'LSPRCP'}, 
    #    'PPANVI'    :{'file':'geosgcm_surf','key':'ANPRCP'}, 
    #}
    ##inc_min = {'geosgcm_prog':360, 'geosgcm_conv':15, 'geosgcm_surf':180}
    #inc_min = {'geosgcm_prog':360, 'geosgcm_conv':30, 'geosgcm_surf':180}
    #offset_min = {'geosgcm_prog':0, 'geosgcm_conv':0, 'geosgcm_surf':90}
    #run_specif_name = '-MOM_NoDeepCu'
    ############################################################################

177
178
179
180
181
182
183
184
185
186
187
188
189
190
    ## PREPARING STEPS
    TM = Timer()
    cdo = Cdo()

    if len(sys.argv) > 1:
        n_tasks = int(sys.argv[1])
    else:
        n_tasks = 1
    print('Using ' + str(n_tasks) + ' taks.')

    ## EXTRACT VARIABLES FROM SIMULATIONS
    for var_name in var_names:
        print('############## var ' + var_name + ' ##################')

191
192
193
194
195
196
        #dt_range = np.arange(first_date + timedelta(minutes=offset_min[
        #                    var_dict[var_name]['file']]),
        #                    last_date + timedelta(days=1),
        #                    timedelta(minutes=inc_min[
        #                    var_dict[var_name]['file']])).tolist()
        dt_range = np.arange(first_date,
197
198
                            last_date + timedelta(days=1),
                            timedelta(minutes=inc_min[
199
                            var_dict[var_name]['inc']])).tolist()
200
201
202
203
204
205

        for res in ress:
            print('############## res ' + str(res) + ' ##################')

            sim_name = model_name + '-' + str(res) + 'km'+run_specif_name
            inp_dir	= os.path.join(raw_data_dir, sim_name)
206
207
208
209
210
211

            # out_dir = directory for final model output (after mergetime)
            # out_tmp_dir = directory for output of files in time merge
            # level of raw model output
            out_dir, out_tmp_dir = paste_dir_names(out_base_dir,
                                                   model_name, res, domain)
212
            Path(out_dir).mkdir(parents=True, exist_ok=True)
213
214
215
            # remove temporary fils if desired
            if options['rm_tmp_folder'] and os.path.exists(out_tmp_dir):
                shutil.rmtree(out_tmp_dir)
216
217
218
219
220
221
            Path(out_tmp_dir).mkdir(parents=True, exist_ok=True)

            # find times and files that should be extracted
            # and prepare arguments for function
            args = []
            for dt in dt_range:
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
                ## execption, file that does not exist anymore
                #if dt not in [datetime(2016,9,1,22,30),
                #      datetime(2016,9,1,21,15),datetime(2016,9,1,21,30),
                #      datetime(2016,9,1,21,45),datetime(2016,9,1,22,00),
                #      datetime(2016,9,1,22,15),datetime(2016,9,1,22,30),
                #      datetime(2016,9,1,22,45),datetime(2016,9,1,23,00),
                #      datetime(2016,9,1,23,15),datetime(2016,9,1,23,30),
                #      datetime(2016,9,1,23,45)]:


                inp_dir1 = os.path.join(inp_dir,var_dict[var_name]['folder'],
                                        '{}_{}_{}'.format(
                                        var_dict[var_name]['folder'],
                                        var_dict[var_name]['inc'],
                                        var_dict[var_name]['file'],
                                        ))
                inp_file = glob.glob(os.path.join(inp_dir1,
                                    '*{:%Y%m%d_%H%M}z.nc4'.format(dt)))[0]
                #print(inp_file)
                #quit()
                out_file = os.path.join(out_tmp_dir,
                            var_name+'_{:%Y%m%d%H%M}'.format(dt)+'.nc')
                args.append( (inp_file, out_file, dt, box, options, var_name,
                              var_dict, res) )
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

            # run function serial or parallel
            if n_tasks > 1:
                with Pool(processes=n_tasks) as pool:
                    results = pool.starmap(sellatlon_GEOS, args)
            else:
                results = []
                for arg in args:
                    results.append(sellatlon_GEOS(*arg))

            # collect timings from subtasks
            for task_TM in results:
                TM.merge_timings(task_TM)

            # merge all time step files to one
261
            TM.start('merge')
262
            #cdo_mergetime(out_tmp_dir, out_dir, var_name)
263
            TM.stop('merge')
264

265
    print('ran GEOS at resolutions {} for variables {}'.format(ress, var_names))
266
    TM.print_report()