Skip to content

Add map_variables and -99999 nan value to read_crn #1368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Feb 17, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sphinx/source/whatsnew/v0.9.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Added ``map_variables`` option to :func:`~pvlib.iotools.read_crn` (:pull:`1368`)

Bug fixes
~~~~~~~~~
Expand Down
33 changes: 17 additions & 16 deletions pvlib/iotools/crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,22 @@
]


def read_crn(filename):
"""
Read a NOAA USCRN fixed-width file into pandas dataframe. The CRN is
described in [1]_ and [2]_.
def read_crn(filename, map_variables=True):
"""Read a NOAA USCRN fixed-width file into a pandas dataframe.

The CRN network consists of a +100 meteorological stations covering the
U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to
provide long-term measurements of temperature, precipitation, and soil
moisture and temperature. Additionally, global horizontal irradiance (GHI)
is measured using a photodiode pyranometer.

Parameters
----------
filename: str, path object, or file-like
filepath or url to read for the fixed-width file.
map_variables: bool, default: True
When true, renames columns of the Dataframe to pvlib variable names
where applicable. See variable VARIABLE_MAP.

Returns
-------
Expand All @@ -60,10 +67,10 @@ def read_crn(filename):
-----
CRN files contain 5 minute averages labeled by the interval ending
time. Here, missing data is flagged as NaN, rather than the lowest
possible integer for a field (e.g. -999 or -99). Air temperature in
deg C. Wind speed in m/s at a height of 1.5 m above ground level.
possible integer for a field (e.g. -999 or -99). Air temperature is in
deg C and wind speed is in m/s at a height of 1.5 m above ground level.

Variables corresponding to standard pvlib variables are renamed,
Variables corresponding to standard pvlib variables are by default renamed,
e.g. `SOLAR_RADIATION` becomes `ghi`. See the
`pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.

Expand Down Expand Up @@ -103,19 +110,13 @@ def read_crn(filename):
dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4),
format='%Y%m%d%H%M', utc=True)
data = data.set_index(dtindex)
try:
# to_datetime(utc=True) does not work in older versions of pandas
data = data.tz_localize('UTC')
except TypeError:
pass
Comment on lines -106 to -110
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too bad I neglected to indicate the versions that needed this. I think the tests are comprehensive enough that we can safely remove this if they still pass.


# Now we can set nans. This could be done a per column basis to be
# safer, since in principle a real -99 value could occur in a -9999
# column. Very unlikely to see that in the real world.
for val in [-99, -999, -9999]:
# consider replacing with .replace([-99, -999, -9999])
data = data.where(data != val, np.nan)
data = data.replace([-99, -999, -9999], np.nan)

data = data.rename(columns=VARIABLE_MAP)
if map_variables:
data = data.rename(columns=VARIABLE_MAP)

return data
26 changes: 21 additions & 5 deletions pvlib/tests/iotools/test_crn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


@pytest.fixture
def columns():
def columns_mapped():
return [
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi',
Expand All @@ -17,6 +17,16 @@ def columns():
'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']


@pytest.fixture
def columns_unmapped():
return [
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
'LONGITUDE', 'LATITUDE', 'AIR_TEMPERATURE', 'PRECIPITATION',
'SOLAR_RADIATION', 'SR_FLAG', 'SURFACE_TEMPERATURE', 'ST_TYPE',
'ST_FLAG', 'RELATIVE_HUMIDITY', 'RH_FLAG', 'SOIL_MOISTURE_5',
'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'WIND_1_5', 'WIND_FLAG']


@pytest.fixture
def dtypes():
return [
Expand All @@ -39,7 +49,7 @@ def testfile_problems():
return DATA_DIR / 'CRN_with_problems.txt'


def test_read_crn(testfile, columns, dtypes):
def test_read_crn(testfile, columns_mapped, dtypes):
index = pd.DatetimeIndex(['2019-01-01 16:10:00',
'2019-01-01 16:15:00',
'2019-01-01 16:20:00',
Expand All @@ -54,14 +64,20 @@ def test_read_crn(testfile, columns, dtypes):
0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0],
[53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0,
0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
expected = pd.DataFrame(values, columns=columns, index=index)
expected = pd.DataFrame(values, columns=columns_mapped, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile)
assert_frame_equal(out, expected)


def test_read_crn_problems(testfile_problems, columns, dtypes):
# Test map_variables=False returns correct column names
def test_read_crn_map_variables(testfile, columns_unmapped, dtypes):
out = crn.read_crn(testfile, map_variables=False)
assert (out.columns == columns_unmapped).all()


def test_read_crn_problems(testfile_problems, columns_mapped, dtypes):
# GH1025
index = pd.DatetimeIndex(['2020-07-06 12:00:00',
'2020-07-06 13:10:00'],
Expand All @@ -72,7 +88,7 @@ def test_read_crn_problems(testfile_problems, columns, dtypes):
[92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62,
26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0,
1.64, 0]])
expected = pd.DataFrame(values, columns=columns, index=index)
expected = pd.DataFrame(values, columns=columns_mapped, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = crn.read_crn(testfile_problems)
Expand Down