|
5 | 5 |
|
6 | 6 | import contextlib |
7 | 7 | from datetime import datetime, timezone |
| 8 | +import logging |
8 | 9 | import re |
9 | 10 | import string |
10 | 11 |
|
|
16 | 17 |
|
17 | 18 | exporter = Exporter(globals()) |
18 | 19 |
|
| 20 | +log = logging.getLogger(__name__) |
| 21 | + |
19 | 22 |
|
20 | 23 | def _decode_coords(coordinates): |
21 | 24 | """Turn a string of coordinates from WPC coded surface bulletin into a lon/lat tuple. |
@@ -107,44 +110,48 @@ def parse_wpc_surface_bulletin(bulletin, year=None): |
107 | 110 | # A single file may have multiple sets of data that are valid at different times. Set |
108 | 111 | # the valid_time string that will correspond to all the following lines parsed, until |
109 | 112 | # the next valid_time is found. |
110 | | - if parts[0] in ('VALID', 'SURFACE PROG VALID'): |
111 | | - dtstr = parts[-1] |
112 | | - valid_time = valid_time.replace(year=year or valid_time.year, month=int(dtstr[:2]), |
113 | | - day=int(dtstr[2:4]), hour=int(dtstr[4:6]), |
114 | | - minute=0, second=0, microsecond=0) |
115 | | - else: |
116 | | - feature, *info = parts |
117 | | - if feature in {'HIGHS', 'LOWS'}: |
118 | | - # For each pressure center, add its data as a new row |
119 | | - # While ideally these occur in pairs, some bulletins have had multiple |
120 | | - # locations for a single center strength value. So instead walk one at a time |
121 | | - # and keep track of the most recent strength. |
122 | | - strength = np.nan |
123 | | - for item in info: |
124 | | - if len(item) <= 4 and item[0] in {'8', '9', '1'}: |
125 | | - strength = int(item) |
| 113 | + try: |
| 114 | + if parts[0] in ('VALID', 'SURFACE PROG VALID'): |
| 115 | + dtstr = parts[-1] |
| 116 | + valid_time = valid_time.replace(year=year or valid_time.year, |
| 117 | + month=int(dtstr[:2]), day=int(dtstr[2:4]), |
| 118 | + hour=int(dtstr[4:6]), minute=0, second=0, |
| 119 | + microsecond=0) |
| 120 | + else: |
| 121 | + feature, *info = parts |
| 122 | + if feature in {'HIGHS', 'LOWS'}: |
| 123 | + # For each pressure center, add its data as a new row |
| 124 | + # While ideally these occur in pairs, some bulletins have had multiple |
| 125 | + # locations for a single center strength value. So instead walk one at a |
| 126 | + # time and keep track of the most recent strength. |
| 127 | + strength = np.nan |
| 128 | + for item in info: |
| 129 | + if len(item) <= 4 and item[0] in {'8', '9', '1'}: |
| 130 | + strength = int(item) |
| 131 | + else: |
| 132 | + parsed_text.append((valid_time, feature.rstrip('S'), strength, |
| 133 | + Point(_decode_coords(item)))) |
| 134 | + elif feature in {'WARM', 'COLD', 'STNRY', 'OCFNT', 'TROF'}: |
| 135 | + # Some bulletins include 'WK', 'MDT', or 'STG' to indicate the front's |
| 136 | + # strength. If present, separate it from the rest of the info, which gives |
| 137 | + # the position of the front. |
| 138 | + if info[0][0] in string.ascii_letters: |
| 139 | + strength, *boundary = info |
126 | 140 | else: |
127 | | - parsed_text.append((valid_time, feature.rstrip('S'), strength, |
128 | | - Point(_decode_coords(item)))) |
129 | | - elif feature in {'WARM', 'COLD', 'STNRY', 'OCFNT', 'TROF'}: |
130 | | - # Some bulletins include 'WK', 'MDT', or 'STG' to indicate the front's |
131 | | - # strength. If present, separate it from the rest of the info, which gives the |
132 | | - # position of the front. |
133 | | - if info[0][0] in string.ascii_letters: |
134 | | - strength, *boundary = info |
135 | | - else: |
136 | | - strength, boundary = np.nan, info |
137 | | - |
138 | | - # Create a list of Points and create Line from points, if possible |
139 | | - boundary = [Point(_decode_coords(point)) for point in boundary] |
140 | | - boundary = LineString(boundary) if len(boundary) > 1 else boundary[0] |
141 | | - |
142 | | - # Add new row in the data for each front |
143 | | - parsed_text.append((valid_time, feature, strength, boundary)) |
144 | | - # Look for a year at the end of the line (from the product header) |
145 | | - elif (year is None and len(info) >= 2 and re.match(r'\d{4}', info[-1]) |
146 | | - and re.match(r'\d{2}', info[-2])): |
147 | | - with contextlib.suppress(ValueError): |
148 | | - year = int(info[-1]) |
| 141 | + strength, boundary = np.nan, info |
| 142 | + |
| 143 | + # Create a list of Points and create Line from points, if possible |
| 144 | + boundary = [Point(_decode_coords(point)) for point in boundary] |
| 145 | + boundary = LineString(boundary) if len(boundary) > 1 else boundary[0] |
| 146 | + |
| 147 | + # Add new row in the data for each front |
| 148 | + parsed_text.append((valid_time, feature, strength, boundary)) |
| 149 | + # Look for a year at the end of the line (from the product header) |
| 150 | + elif (year is None and len(info) >= 2 and re.match(r'\d{4}', info[-1]) |
| 151 | + and re.match(r'\d{2}', info[-2])): |
| 152 | + with contextlib.suppress(ValueError): |
| 153 | + year = int(info[-1]) |
| 154 | + except ValueError: |
| 155 | + log.warning('Could not parse: %s', ' '.join(parts)) |
149 | 156 |
|
150 | 157 | return pd.DataFrame(parsed_text, columns=['valid', 'feature', 'strength', 'geometry']) |
0 commit comments