pvlib · kandersolar · Feb 17, 2022 · Jan 5, 2022 · Jan 6, 2022 · Jan 7, 2022
diff --git a/docs/sphinx/source/whatsnew/v0.9.1.rst b/docs/sphinx/source/whatsnew/v0.9.1.rst
@@ -11,6 +11,7 @@ Deprecations
 
 Enhancements
 ~~~~~~~~~~~~
+* Added ``map_variables`` option to :func:`~pvlib.iotools.read_crn` (:pull:`1368`)
 
 Bug fixes
 ~~~~~~~~~

diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py
@@ -40,15 +40,22 @@
 ]
 
 
-def read_crn(filename):
-    """
-    Read a NOAA USCRN fixed-width file into pandas dataframe.  The CRN is
-    described in [1]_ and [2]_.
+def read_crn(filename, map_variables=True):
+    """Read a NOAA USCRN fixed-width file into a pandas dataframe.
+
+    The CRN network consists of a +100 meteorological stations covering the
+    U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to
+    provide long-term measurements of temperature, precipitation, and soil
+    moisture and temperature. Additionally, global horizontal irradiance (GHI)
+    is measured using a photodiode pyranometer.
 
     Parameters
     ----------
     filename: str, path object, or file-like
         filepath or url to read for the fixed-width file.
+    map_variables: bool, default: True
+        When true, renames columns of the Dataframe to pvlib variable names
+        where applicable. See variable VARIABLE_MAP.
 
     Returns
     -------
@@ -60,10 +67,10 @@ def read_crn(filename):
     -----
     CRN files contain 5 minute averages labeled by the interval ending
     time. Here, missing data is flagged as NaN, rather than the lowest
-    possible integer for a field (e.g. -999 or -99). Air temperature in
-    deg C. Wind speed in m/s at a height of 1.5 m above ground level.
+    possible integer for a field (e.g. -999 or -99). Air temperature is in
+    deg C and wind speed is in m/s at a height of 1.5 m above ground level.
 
-    Variables corresponding to standard pvlib variables are renamed,
+    Variables corresponding to standard pvlib variables are by default renamed,
     e.g. `SOLAR_RADIATION` becomes `ghi`. See the
     `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
 
@@ -103,19 +110,13 @@ def read_crn(filename):
     dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4),
                              format='%Y%m%d%H%M', utc=True)
     data = data.set_index(dtindex)
-    try:
-        # to_datetime(utc=True) does not work in older versions of pandas
-        data = data.tz_localize('UTC')
-    except TypeError:
-        pass
 
     # Now we can set nans. This could be done a per column basis to be
     # safer, since in principle a real -99 value could occur in a -9999
     # column. Very unlikely to see that in the real world.
-    for val in [-99, -999, -9999]:
-        # consider replacing with .replace([-99, -999, -9999])
-        data = data.where(data != val, np.nan)
+    data = data.replace([-99, -999, -9999], np.nan)
 
-    data = data.rename(columns=VARIABLE_MAP)
+    if map_variables:
+        data = data.rename(columns=VARIABLE_MAP)
 
     return data
diff --git a/pvlib/tests/iotools/test_crn.py b/pvlib/tests/iotools/test_crn.py
@@ -7,7 +7,7 @@
 
 
 @pytest.fixture
-def columns():
+def columns_mapped():
     return [
         'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
         'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi',
@@ -17,6 +17,16 @@ def columns():
         'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']
 
 
+@pytest.fixture
+def columns_unmapped():
+    return [
+        'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
+        'LONGITUDE', 'LATITUDE', 'AIR_TEMPERATURE', 'PRECIPITATION',
+        'SOLAR_RADIATION', 'SR_FLAG', 'SURFACE_TEMPERATURE', 'ST_TYPE',
+        'ST_FLAG', 'RELATIVE_HUMIDITY', 'RH_FLAG', 'SOIL_MOISTURE_5',
+        'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'WIND_1_5', 'WIND_FLAG']
+
+
 @pytest.fixture
 def dtypes():
     return [
@@ -39,7 +49,7 @@ def testfile_problems():
     return DATA_DIR / 'CRN_with_problems.txt'
 
 
-def test_read_crn(testfile, columns, dtypes):
+def test_read_crn(testfile, columns_mapped, dtypes):
     index = pd.DatetimeIndex(['2019-01-01 16:10:00',
                               '2019-01-01 16:15:00',
                               '2019-01-01 16:20:00',
@@ -54,14 +64,20 @@ def test_read_crn(testfile, columns, dtypes):
          0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0],
         [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0,
          0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
-    expected = pd.DataFrame(values, columns=columns, index=index)
+    expected = pd.DataFrame(values, columns=columns_mapped, index=index)
     for (col, _dtype) in zip(expected.columns, dtypes):
         expected[col] = expected[col].astype(_dtype)
     out = crn.read_crn(testfile)
     assert_frame_equal(out, expected)
 
 
-def test_read_crn_problems(testfile_problems, columns, dtypes):
+# Test map_variables=False returns correct column names
+def test_read_crn_map_variables(testfile, columns_unmapped, dtypes):
+    out = crn.read_crn(testfile, map_variables=False)
+    assert (out.columns == columns_unmapped).all()
+
+
+def test_read_crn_problems(testfile_problems, columns_mapped, dtypes):
     # GH1025
     index = pd.DatetimeIndex(['2020-07-06 12:00:00',
                               '2020-07-06 13:10:00'],
@@ -72,7 +88,7 @@ def test_read_crn_problems(testfile_problems, columns, dtypes):
         [92821, 20200706, 1310, 20200706, 810, '2.623', -80.69, 28.62,
          26.9, 0.0, 430.0, 0, 30.2, 'C', 0, 87.0, 0, nan, nan, 989, 0,
          1.64, 0]])
-    expected = pd.DataFrame(values, columns=columns, index=index)
+    expected = pd.DataFrame(values, columns=columns_mapped, index=index)
     for (col, _dtype) in zip(expected.columns, dtypes):
         expected[col] = expected[col].astype(_dtype)
     out = crn.read_crn(testfile_problems)