@article{Yong:283559,
      recid = {283559},
      author = {Yong Liu and Alan P. Ker},
      title = {Is There Too Much History in Historical Yield Data},
      address = {2019-01-30},
      number = {1621-2019-294},
      pages = {33},
      month = {Jan},
      year = {2019},
      abstract = {County crop yield data from United States Department of  Agriculture - National Agricultural Statistics Service has  and continues to be extensively used in the literature as  well as practice. The most notable practical example is  crop insurance, as the Risk Management Agency uses the data  to set guarantees, estimate premium rates, and calculate  indemnities for their area programs. In many applications  including crop insurance, yield data are detrended and  adjusted for possible heteroscedasticity and then assumed  to be independent and identically distributed. For most  major crop-region combinations, county yield data exist  from the 1950s onwards and reflect very significant  innovations in both seed and farm management technologies;  innovations that have likely moved mass all around the  support of the yield distribution. Despite correcting for  movements in the first two moments of the yield data  generating process (dgp), these innovations raise doubt  regarding the identically distributed assumption. This  manuscript considers the question of how much historical  yield data should be used in empirical analyses. The answer  is obviously dependent on the empirical application,  crop-region combination, econometric methodology, and  chosen loss function.  Nonetheless, we hope to provide some  guidance by tackling this question in three ways. First, we  use distributional tests to assess if and when the adjusted  yield data may result from different dgps. Second, we  consider the application to crop insurance by using an  out-of-sample rating game -- commonly employed in the  literature -- to compare rates from the full versus  historically restricted data sets. Third, we estimate  flexible time-varying dgps and then simulate to quantify  the additional error when the identically distributed  assumption is erroneously imposed. Our findings suggest  that despite accounting for time-varying movements in the  first two moments, using yield data more than 30 years past  increases estimation error.  Given that discarding  historical data is unappetizing, particularly so in  applications with relatively small T, we investigate three  methodologies that re-incorporate the discarded data while  explicitly acknowledging: (i) the retained and discarded  data are from different dgps; and (ii) the extent and form  of those differences is unknown. Our results suggest gains  in efficiency may be realized by using these more flexible  methodologies.  While our results are most applicable to  the crop insurance literature, we suggest proceeding with  caution when using historical yield data in other  applications as well.},
      url = {http://ageconsearch.umn.edu/record/283559},
      doi = {https://doi.org/10.22004/ag.econ.283559},
}