library(tidyverse) library(dplyr) library(ggplot2) data <- read_delim("erasmus.csv", delim=";") names(data) <- c("Project Reference", "Academic Year", "Mobility Start Month", "Mobility End Month", "Mobility Duration", "Activity (mob)", "Field of Education", "Participant Nationality", "Education Level", "Participant Gender", "Participant Profile", "Special Needs", "Fewer Opportunities", "GroupLeader", "Participant Age", "Sending Country Code", "Sending City", "Sending Organization", "Sending Organisation Erasmus Code", "Receiving Country Code", "Receiving City", "Receiving Organization", "Receiving Organisation Erasmus Code", "Participants") data df <- data.frame(length_of_stay = c(data$`Mobility Duration`), edu_level = c(data$`Education Level`)) df$`Mobility Duration` <- as.integer(df$`Mobility Duration`) df_omit <- na.omit(df) df2 <- df_omit[df_omit$edu_level != "??? - ? Unknown ?",] arranged <- arrange(df2, length_of_stay) arranged v <- arranged %>% group_by(length_of_stay, edu_level) %>% tally() v v['edu_level'][v['edu_level'] == "ISCED-2 - Lower secondary education"] <- "ISCED-2" v['edu_level'][v['edu_level'] == "ISCED-3 - Upper secondary education"] <- "ISCED-3" v['edu_level'][v['edu_level'] == "ISCED-4 - Post-secondary non-tertiary education"] <- "ISCED-4" v['edu_level'][v['edu_level'] == "ISCED-5 - Short-cycle within the first cycle / Short-cycle tertiary education (EQF-5)"] <- "ISCED-5" v['edu_level'][v['edu_level'] == "ISCED-6 - First cycle / Bachelor’s or equivalent level (EQF-6)"] <- "ISCED-6" v['edu_level'][v['edu_level'] == "ISCED-7 - Second cycle / Master’s or equivalent level (EQF-7)"] <- "ISCED-7" v['edu_level'][v['edu_level'] == "ISCED-8 - Third cycle / Doctoral or equivalent level (EQF-8)"] <- "ISCED-8" v['edu_level'][v['edu_level'] == "ISCED-9 - Not elsewhere classified"] <- "ISCED-9" v g <- ggplot(v, aes(x = edu_level, y = length_of_stay)) + geom_boxplot(col = c("green","yellow","purple", "red", "blue", "pink", "salmon1", "olivedrab"), varwidth = TRUE) + ylab("Length of Stay") + xlab("Education Level") + g