Julia: Manipulation of a weather DataFrame

using DataFrames, CSV, Downloads, Dates
url = "https://raw.githubusercontent.com/dmetivie/StochasticWeatherGenerator.jl/master/weather_files/TX_STAID000031.txt"
http_response = Downloads.download(url) # download file from a GitHub repo
df = CSV.read(http_response, DataFrame; comment="#", normalizenames=true, dateformat="yyyymmdd", types=Dict(:DATE => Date))
df[1:20,:]
20×5 DataFrame
 Row │ STAID  SOUID  DATE        TX     Q_TX
     │ Int64  Int64  Date        Int64  Int64
─────┼────────────────────────────────────────
   1 │    31   4895  1897-01-01    116      0
   2 │    31   4895  1897-01-02    123      0
   3 │    31   4895  1897-01-03    130      0
   4 │    31   4895  1897-01-04    102      0
   5 │    31   4895  1897-01-05    140      0
   6 │    31   4895  1897-01-06    116      0
   7 │    31   4895  1897-01-07    155      0
   8 │    31   4895  1897-01-08    151      0
   9 │    31   4895  1897-01-09    147      0
  10 │    31   4895  1897-01-10    158      0
  11 │    31   4895  1897-01-11    127      0
  12 │    31   4895  1897-01-12    140      0
  13 │    31   4895  1897-01-13    124      0
  14 │    31   4895  1897-01-14    119      0
  15 │    31   4895  1897-01-15    155      0
  16 │    31   4895  1897-01-16    170      0
  17 │    31   4895  1897-01-17    116      0
  18 │    31   4895  1897-01-18     83      0
  19 │    31   4895  1897-01-19    117      0
  20 │    31   4895  1897-01-20    129      0
using DataFramesMeta, StatsBase
factor = 0.1 # conversion factor to °C
df_month = @chain df begin
    @subset(:Q_TX .!= 9) # remove missing 
    @transform(:MONTH = month.(:DATE)) # add month column
    @by(:MONTH, :MONTHLY_MEAN = mean(:TX)*factor, :MONTHLY_STD = std(:TX)*factor) # grouby MONTH + takes the mean/std in each category 
end
12×3 DataFrame
 Row │ MONTH  MONTHLY_MEAN  MONTHLY_STD
     │ Int64  Float64       Float64
─────┼──────────────────────────────────
   1 │     1       11.4007      3.46532
   2 │     2       12.6008      3.58906
   3 │     3       15.3331      3.15435
   4 │     4       18.1409      3.08704
   5 │     5       22.1839      3.35553
   6 │     6       25.7999      3.08702
   7 │     7       28.5384      2.88103
   8 │     8       28.0754      2.74896
   9 │     9       24.7867      2.93797
  10 │    10       20.1743      3.28496
  11 │    11       15.1703      3.34484
  12 │    12       12.0719      3.3662
using StatsPlots
@df df_month plot(monthabbr.(1:12), :MONTHLY_MEAN, ribbon = :MONTHLY_STD, label = "Mean Temperature")
ylabel!("T(°C)")
CC BY-SA 4.0 David Métivier. Last modified: October 16, 2024. Website built with Franklin.jl and the Julia programming language.