library(tidyverse) library(ggplot2) library(dplyr) data <- read_delim("erasmus.csv", delim=";") names(data) <- c("Project Reference", "Academic Year", "Mobility Start Month", "Mobility End Month", "Mobility Duration", "Activity (mob)", "Field of Education", "Participant Nationality", "Education Level", "Participant Gender", "Participant Profile", "Special Needs", "Fewer Opportunities", "GroupLeader", "Participant Age", "Sending Country Code", "Sending City", "Sending Organization", "Sending Organisation Erasmus Code", "Receiving Country Code", "Receiving City", "Receiving Organization", "Receiving Organisation Erasmus Code", "Participants") df <- data.frame(Age = c(data$`Participant Age`), Gender = c(data$`Participant Gender`), Participants = c(data$Participants)) #vytvorim si data frame len so stlpcami vek, pohlavie a počet participantov df$Age <- as.integer(df$Age) #zmenim data typ chr na int aby sa mi s vekom dalo pracovať ako s číslom df$Participants <- as.integer(df$Participants) # taktiež zmenim data typ počet participantov na číslo df <- df[(df$Age != "-"),] #v data frame odstranim všetky nevyplnené hodnoty v stľpci vek df <- df[(df$Age >= 1 & df$Age <= 100),] #taktiež odstránim chybné hodnoty, ktoré sú menšie ako 1 a väčšie ako 100 df <- filter(df, Gender != "Undefined") #odstránim tiež hodnoty, ktoré sú v stĺpci pohlavie uvedené ako nedefinované df <- arrange(df, Age) #usporiadam data frame podľa stlpca vek labs <- c(paste(seq(0, 75, by = 5), seq(0+5, 80, by = 5), #vytvorim si vekove kategorie sep = "-")) df$AgeGroup <- cut(df$Age, breaks = c(seq(0, 75, by = 5), Inf), labels = labs, right = FALSE) #priradím vek vekovej kategórii df <- df %>% group_by(AgeGroup, Gender) %>% tally() #jednotlivých účastníkov podľa veku zaradím do príslušnej vekovej skupiny g <- ggplot(df, aes(x = AgeGroup, fill = Gender, y = ifelse(test = Gender == "Male", yes = -n, no = n))) + geom_bar(stat = "identity") + scale_y_continuous(labels = function(x) { x = abs(x) format(x,scientific = FALSE) }, limits = max(df$n) * c(-1,1)) + ylab("Number of Participants") + xlab("Age Group") + coord_flip() g