Manipulation of a weather data with Julia

Here is a tiny example of data (coming from a Montpellier weather station) manipulation using DataFrames.jl and DataFramesMeta.jl packages in Julia.

using DataFrames, CSV, Downloads, Dates
using DataFramesMeta, StatsBase
using StatsPlots

url = "https://raw.githubusercontent.com/dmetivie/StochasticWeatherGenerator.jl/master/weather_files/TX_STAID002207.txt"
http_response = Downloads.download(url) # download file from a GitHub repo
df = CSV.read(http_response, DataFrame; normalizenames=true, dateformat="yyyymmdd", types=Dict(:DATE => Date), header = 21)
df[1:20,:]
20×5 DataFrame
 Row │ STAID  SOUID  DATE        TX     Q_TX
     │ Int64  Int64  Date        Int64  Int64
─────┼────────────────────────────────────────
   1 │  2207   6448  1946-01-01     58      0
   2 │  2207   6448  1946-01-02     96      0
   3 │  2207   6448  1946-01-03     95      0
   4 │  2207   6448  1946-01-04     76      0
   5 │  2207   6448  1946-01-05     70      0
   6 │  2207   6448  1946-01-06    108      0
   7 │  2207   6448  1946-01-07    110      0
   8 │  2207   6448  1946-01-08     90      0
   9 │  2207   6448  1946-01-09    130      0
  10 │  2207   6448  1946-01-10    133      0
  11 │  2207   6448  1946-01-11    146      0
  12 │  2207   6448  1946-01-12    150      0
  13 │  2207   6448  1946-01-13    130      0
  14 │  2207   6448  1946-01-14     80      0
  15 │  2207   6448  1946-01-15     32      0
  16 │  2207   6448  1946-01-16     44      0
  17 │  2207   6448  1946-01-17     62      0
  18 │  2207   6448  1946-01-18     71      0
  19 │  2207   6448  1946-01-19     97      0
  20 │  2207   6448  1946-01-20     70      0

Monthly mean and standard deviation of daily temperature (in °C)

factor = 0.1 # to convert from tenths of °C to °C
df_month = @chain df begin
    @subset(:Q_TX .!= 9) # remove missing
    @transform(:MONTH = month.(:DATE)) # add month column
    @by(:MONTH, :MONTHLY_MEAN = mean(:TX)*factor, :MONTHLY_STD = std(:TX)*factor) # groupby MONTH + takes the mean/std in each category
end
12×3 DataFrame
 Row │ MONTH  MONTHLY_MEAN  MONTHLY_STD
     │ Int64  Float64       Float64
─────┼──────────────────────────────────
   1 │     1       11.4453      3.61758
   2 │     2       12.7332      3.67788
   3 │     3       15.4664      3.41305
   4 │     4       18.0968      3.24402
   5 │     5       21.7896      3.4346
   6 │     6       26.0935      3.49832
   7 │     7       29.0241      3.00726
   8 │     8       28.3806      2.90772
   9 │     9       24.9303      2.98281
  10 │    10       20.4157      3.1212
  11 │    11       15.2892      3.34882
  12 │    12       12.1186      3.40215

Plotting with StatsPlots.jl

begin
    @df df_month plot(monthabbr.(1:12), :MONTHLY_MEAN, ribbon = :MONTHLY_STD, label = "Mean Temperature")
    ylabel!("T(°C)")
end
Monthly temperature

Global warming trend

Here we consider the yearly mean temperature and a simple linear trend to illustrate the global warming trend.

df_trend = @chain df begin
    @subset(:Q_TX .!= 9) # remove missing
    @transform(:YEAR = year.(:DATE)) # add year column
    @subset(:YEAR .< 2025) # The data stops in July 2025.
    @by(:YEAR, :YEARLY_MEAN = mean(:TX)*factor) # groupby YEAR
end;

For illustration, we consider a simple linear trend fitted "by hand". Of course one could use optimization to select the yearly offset and slope as well as considering other forms of trend.

f_trend(x) = 19 + 0.05 * ifelse(x < 1980, 0, x - 1980) # i.e. ≃2°C warming in 40 years in Montpellier

begin
    @df df_trend plot(:YEAR, :YEARLY_MEAN, label = "Yearly mean temperature trend")
    plot!(df_trend.YEAR, f_trend.(df_trend.YEAR), label = "Linear warming trend")
    ylabel!("T(°C)")
end
Yearly temperature