opensky-statistics/src/main.Rmd

---
title: "Topic 8"
output:
  pdf_document: default
  html_document: default
date: "`r Sys.Date()`"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r preamble, message=FALSE}
# Load Libraries
library(dplyr)
library(lubridate)
library(readr)
library(utils)
library(openSkies)
library(dotenv)
library(httr)
library(jsonlite)
library(trajr)
```

# Download flights
```{r opensky}

time_now <- Sys.time()
creds <- getCredentials(
  client_id = Sys.getenv('OPENSKY_CLIENT_ID'),
  client_secret = Sys.getenv('OPENSKY_CLIENT_SECRET'))

# get departures from Frankfurt airport
departures <- getAirportDepartures(airport = "EDDF", startTime = time_now - hours(1), endTime = time_now, credentials = creds )

getFlights <- function(icao, time, creds){
  flights <-getAircraftFlights(icao, startTime =  time - days(1), endTime = time, credentials = creds )
  return(flights)
}
icao <- departures[[1]][["ICAO24"]]
flights <- getFlights(icao,Sys.time(), creds)
# TODO map from all available flights to tracks
query <- list(icao24= icao, time=as.numeric(flights[[1]][["departure_time"]]))

response <-makeAuthenticatedRequest('tracks/all',query, creds)
track_data <- fromJSON(content(response, as = "text", encoding = "UTF-8"))
if (!is.null(track_data$path) && length(track_data$path) > 0) {

    route_df <- as.data.frame(track_data$path)
    colnames(route_df) <- c("time", "lat", "lon", "alt", "heading", "on_ground")

    message("Loading of route successfull! Points: ", nrow(route_df))

    plot(route_df$lon, route_df$lat, type="o", pch=20, col="blue",
         main=paste("Geographic route of", icao),
         xlab="Longitude", ylab="Latitude")

    plot(route_df$time, route_df$alt, type="l", col="red", lwd=2,
         main=paste("Altitude profile of", icao),
         xlab="Time (Unix)", ylab="Height (Meter)")

  } else {
    print("No path points from api")
  }
```

# Trajectory Characteristics Analysis
```{r trajectory-analysis}
if (exists("route_df") && nrow(route_df) > 1) {

  # Convert lat/lon to approximate meters (using simple equirectangular projection)
  # Reference point: first coordinate
  lat_ref <- route_df$lat[1]
  lon_ref <- route_df$lon[1]

  # Convert to meters (approximate)
  meters_per_deg_lat <- 111320
  meters_per_deg_lon <- 111320 * cos(lat_ref * pi / 180)

  x_meters <- (route_df$lon - lon_ref) * meters_per_deg_lon
  y_meters <- (route_df$lat - lat_ref) * meters_per_deg_lat
  time_seconds <- route_df$time - route_df$time[1]

  # Create trajr trajectory object
  trj <- TrajFromCoords(
    data.frame(x = x_meters, y = y_meters, time = time_seconds),
    xCol = "x", yCol = "y", timeCol = "time"
  )

  # Calculate trajectory characteristics

  # 1. Duration of travel (seconds)
  duration <- TrajDuration(trj)

  # 2. Total path length (meters)
  path_length <- TrajLength(trj)

  # 3. Diffusion distance (net displacement - straight line from start to end)
  diffusion_distance <- TrajDistance(trj)

  # 4. Straightness index (ratio of net displacement to path length, 0-1)
  straightness <- TrajStraightness(trj)

  # 5. Mean travel velocity (meters/second)
  mean_velocity <- path_length / duration

  # 6. Fractal dimension (using divider method)
  # Note: requires sufficient points for accurate estimation
  fractal_dim <- tryCatch({
    # Calculate appropriate step sizes based on trajectory length
    min_step <- TrajLength(trj) / 100
    max_step <- TrajLength(trj) / 2
    step_sizes <- exp(seq(log(min_step), log(max_step), length.out = 10))

    TrajFractalDimension(trj, stepSizes = step_sizes)
  }, error = function(e) {
    message("Could not calculate fractal dimension: ", e$message)
    NA
  })

  # Create summary data frame
  trajectory_characteristics <- data.frame(
    Parameter = c(
      "Duration of Travel (s)",
      "Duration of Travel (min)",
      "Path Length (m)",
      "Path Length (km)",
      "Diffusion Distance (m)",
      "Diffusion Distance (km)",
      "Straightness Index",
      "Mean Travel Velocity (m/s)",
      "Mean Travel Velocity (km/h)",
      "Fractal Dimension"
    ),
    Value = c(
      round(duration, 2),
      round(duration / 60, 2),
      round(path_length, 2),
      round(path_length / 1000, 2),
      round(diffusion_distance, 2),
      round(diffusion_distance / 1000, 2),
      round(straightness, 4),
      round(mean_velocity, 2),
      round(mean_velocity * 3.6, 2),
      round(fractal_dim, 4)
    )
  )

  print(trajectory_characteristics)

  # Visualize the trajectory using trajr
  plot(trj, main = paste("Trajectory of", icao))

} else {
  message("No valid trajectory data available for analysis")
}
```

# Statistical Analysis of Multiple Trajectories
```{r multi-trajectory-analysis}
# Function to calculate trajectory characteristics for a single flight
calculate_trajectory_params <- function(icao24, departure_time, creds) {
  tryCatch({
    query <- list(icao24 = icao24, time = as.numeric(departure_time))
    response <- makeAuthenticatedRequest('tracks/all', query, creds)

    # Check for HTTP errors
    if (httr::status_code(response) != 200) {
      return(NULL)
    }

    track_data <- fromJSON(content(response, as = "text", encoding = "UTF-8"))

    if (is.null(track_data$path) || length(track_data$path) < 2) {
      return(NULL)
    }

    route_df <- as.data.frame(track_data$path)
    colnames(route_df) <- c("time", "lat", "lon", "alt", "heading", "on_ground")

    if (nrow(route_df) < 3) return(NULL)

    # Convert to meters
    lat_ref <- route_df$lat[1]
    lon_ref <- route_df$lon[1]
    meters_per_deg_lat <- 111320
    meters_per_deg_lon <- 111320 * cos(lat_ref * pi / 180)

    x_meters <- (route_df$lon - lon_ref) * meters_per_deg_lon
    y_meters <- (route_df$lat - lat_ref) * meters_per_deg_lat
    time_seconds <- route_df$time - route_df$time[1]

    trj <- TrajFromCoords(
      data.frame(x = x_meters, y = y_meters, time = time_seconds),
      xCol = "x", yCol = "y", timeCol = "time"
    )

    # Calculate parameters
    duration <- TrajDuration(trj)
    path_length <- TrajLength(trj)
    diffusion_dist <- TrajDistance(trj)
    straight <- TrajStraightness(trj)
    mean_vel <- path_length / duration

    # Fractal dimension
    fractal <- tryCatch({
      min_step <- path_length / 100
      max_step <- path_length / 2
      if (min_step > 0 && max_step > min_step) {
        step_sizes <- exp(seq(log(min_step), log(max_step), length.out = 10))
        TrajFractalDimension(trj, stepSizes = step_sizes)
      } else {
        NA
      }
    }, error = function(e) NA)

    return(data.frame(
      icao24 = icao24,
      diffusion_distance_km = diffusion_dist / 1000,
      straightness = straight,
      duration_min = duration / 60,
      mean_velocity_kmh = mean_vel * 3.6,
      fractal_dimension = fractal
    ))

  }, error = function(e) {
    message("Error processing ", icao24, ": ", e$message)
    return(NULL)
  })
}

# Collect trajectory data from multiple departures
message("Collecting trajectory data from departures...")
all_trajectories <- list()

# Process available departures (limit to avoid API rate limits)
n_departures <- min(length(departures), 20)

for (i in 1:n_departures) {
  dep <- departures[[i]]
  icao24 <- dep[["ICAO24"]]
  dep_time <- dep[["departure_time"]]  # Use departure time directly from departures list

  # Skip if no departure time available
  if (is.null(dep_time)) {
    message("Skipping ", icao24, ": no departure time")
    next
  }

  params <- calculate_trajectory_params(icao24, dep_time, creds)
  if (!is.null(params)) {
    all_trajectories[[length(all_trajectories) + 1]] <- params
    message("Successfully processed trajectory for ", icao24)
  }

  Sys.sleep(0.5)  # Rate limiting
}

# Combine all trajectory data
if (length(all_trajectories) > 0) {
  trajectory_stats_df <- do.call(rbind, all_trajectories)
  message("Successfully collected ", nrow(trajectory_stats_df), " trajectories")
  print(trajectory_stats_df)
} else {
  message("No trajectory data collected")
}
```

# Basic Statistical Analysis of Trajectory Parameters
```{r statistical-analysis}
if (exists("trajectory_stats_df") && nrow(trajectory_stats_df) >= 2) {

  # Parameters to analyze
  params_to_analyze <- c("diffusion_distance_km", "straightness", "duration_min",
                         "mean_velocity_kmh", "fractal_dimension")
  param_labels <- c("Diffusion Distance (km)", "Straightness Index",
                    "Duration (min)", "Mean Velocity (km/h)", "Fractal Dimension")

  # Function to calculate comprehensive statistics
  calc_stats <- function(x, param_name) {
    x <- x[!is.na(x)]
    if (length(x) < 2) return(NULL)

    data.frame(
      Parameter = param_name,
      N = length(x),
      Mean = round(mean(x), 4),
      Variance = round(var(x), 4),
      Std_Dev = round(sd(x), 4),
      Min = round(min(x), 4),
      Q1 = round(quantile(x, 0.25), 4),
      Median = round(median(x), 4),
      Q3 = round(quantile(x, 0.75), 4),
      Max = round(max(x), 4),
      IQR = round(IQR(x), 4)
    )
  }

  # Calculate statistics for each parameter
  stats_list <- list()
  for (i in seq_along(params_to_analyze)) {
    param <- params_to_analyze[i]
    label <- param_labels[i]
    stats_list[[i]] <- calc_stats(trajectory_stats_df[[param]], label)
  }

  # Combine into summary table
  stats_summary <- do.call(rbind, stats_list[!sapply(stats_list, is.null)])

  cat("\n========== STATISTICAL SUMMARY ==========\n\n")
  print(stats_summary, row.names = FALSE)

  # Boxplots for each parameter
  par(mfrow = c(2, 3))

  for (i in seq_along(params_to_analyze)) {
    param <- params_to_analyze[i]
    label <- param_labels[i]
    data <- trajectory_stats_df[[param]][!is.na(trajectory_stats_df[[param]])]

    if (length(data) >= 2) {
      boxplot(data,
              main = paste("Boxplot:", label),
              ylab = label,
              col = "lightblue",
              border = "darkblue")

      # Add mean point
      points(1, mean(data), pch = 18, col = "red", cex = 1.5)
    }
  }

  par(mfrow = c(1, 1))

  # Density plots for each parameter
  par(mfrow = c(2, 3))

  for (i in seq_along(params_to_analyze)) {
    param <- params_to_analyze[i]
    label <- param_labels[i]
    data <- trajectory_stats_df[[param]][!is.na(trajectory_stats_df[[param]])]

    if (length(data) >= 3) {
      dens <- density(data, na.rm = TRUE)
      plot(dens,
           main = paste("Density:", label),
           xlab = label,
           ylab = "Density",
           col = "darkblue",
           lwd = 2)
      polygon(dens, col = rgb(0, 0, 1, 0.3), border = "darkblue")

      # Add vertical lines for mean and median
      abline(v = mean(data), col = "red", lwd = 2, lty = 2)
      abline(v = median(data), col = "green", lwd = 2, lty = 3)
      legend("topright", legend = c("Mean", "Median"),
             col = c("red", "green"), lty = c(2, 3), lwd = 2, cex = 0.7)
    }
  }

  par(mfrow = c(1, 1))

  # Histogram with density overlay
  par(mfrow = c(2, 3))

  for (i in seq_along(params_to_analyze)) {
    param <- params_to_analyze[i]
    label <- param_labels[i]
    data <- trajectory_stats_df[[param]][!is.na(trajectory_stats_df[[param]])]

    if (length(data) >= 3) {
      hist(data,
           probability = TRUE,
           main = paste("Histogram:", label),
           xlab = label,
           col = "lightgray",
           border = "darkgray")

      # Overlay density curve
      lines(density(data), col = "red", lwd = 2)
    }
  }

  par(mfrow = c(1, 1))

} else {
  message("Insufficient trajectory data for statistical analysis (need at least 2 trajectories)")
}
```

# Interpretation of Results
```{r interpretation}
if (exists("trajectory_stats_df") && nrow(trajectory_stats_df) >= 2) {

  cat("\n========== INTERPRETATION OF TRAJECTORY PARAMETERS ==========\n\n")

  # Diffusion Distance
  dd <- trajectory_stats_df$diffusion_distance_km[!is.na(trajectory_stats_df$diffusion_distance_km)]
  if (length(dd) >= 2) {
    cat("1. DIFFUSION DISTANCE (Net Displacement):\n")
    cat("   - Mean:", round(mean(dd), 2), "km\n")
    cat("   - This represents the straight-line distance from origin to destination.\n")
    cat("   - High variance (", round(var(dd), 2), ") indicates diverse flight distances.\n\n")
  }

  # Straightness
  st <- trajectory_stats_df$straightness[!is.na(trajectory_stats_df$straightness)]
  if (length(st) >= 2) {
    cat("2. STRAIGHTNESS INDEX:\n")
    cat("   - Mean:", round(mean(st), 4), "(range 0-1, where 1 = perfectly straight)\n")
    cat("   - Values close to 1 indicate efficient, direct flight paths.\n")
    cat("   - Lower values suggest deviations due to weather, airspace, or routing.\n\n")
  }

  # Duration
  dur <- trajectory_stats_df$duration_min[!is.na(trajectory_stats_df$duration_min)]
  if (length(dur) >= 2) {
    cat("3. DURATION OF TRAVEL:\n")
    cat("   - Mean:", round(mean(dur), 2), "minutes\n")
    cat("   - Range:", round(min(dur), 2), "-", round(max(dur), 2), "minutes\n")
    cat("   - IQR:", round(IQR(dur), 2), "minutes (middle 50% of flights)\n\n")
  }

  # Velocity
  vel <- trajectory_stats_df$mean_velocity_kmh[!is.na(trajectory_stats_df$mean_velocity_kmh)]
  if (length(vel) >= 2) {
    cat("4. MEAN TRAVEL VELOCITY:\n")
    cat("   - Mean:", round(mean(vel), 2), "km/h\n")
    cat("   - Typical commercial aircraft cruise: 800-900 km/h\n")
    cat("   - Lower values may include taxi, takeoff, and landing phases.\n\n")
  }

  # Fractal Dimension
  fd <- trajectory_stats_df$fractal_dimension[!is.na(trajectory_stats_df$fractal_dimension)]
  if (length(fd) >= 2) {
    cat("5. FRACTAL DIMENSION:\n")
    cat("   - Mean:", round(mean(fd), 4), "\n")
    cat("   - Value of 1.0 = perfectly straight line\n")
    cat("   - Values closer to 2.0 = more complex, space-filling paths\n")
    cat("   - Aircraft typically show low fractal dimension (efficient paths).\n\n")
  }

  cat("========== END OF ANALYSIS ==========\n")
}
```