vhzjK
This commit is contained in:
parent
34da8d0297
commit
dd786495b4
3 changed files with 56 additions and 16 deletions
BIN
.RDataTmp
Normal file
BIN
.RDataTmp
Normal file
Binary file not shown.
25
.Rhistory
25
.Rhistory
|
@ -134,3 +134,28 @@ group_by(annee) %>%
|
|||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
library("jsonlite")
|
||||
library("tidyverse")
|
||||
data<-fromJSON("C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\db_animes\\db_animes.json")
|
||||
dfAnimes <- as.data.frame(data)
|
||||
#Filtrage des animes sortis avant 1960 (pas de télé :() et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(start_season$year > 1960) %>%
|
||||
filter(start_season$year < 2021) %>%
|
||||
filter(media_type == "tv") -> dfAnimes
|
||||
#Dedoublage de la colonne saison
|
||||
dfAnimes %>%
|
||||
mutate(annee = start_season$year) %>%
|
||||
mutate(saison = start_season$season) -> dfAnimes
|
||||
#Transformation de la colonne genre pour la rendre utilisable
|
||||
getgenre <- function(i){
|
||||
dfAnimes[i,13][[1]][[2]] -> res
|
||||
return(res)
|
||||
}
|
||||
dfAnimes %>%
|
||||
mutate(genres = lapply(1:4691, getgenre)) -> dfAnimes
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, title, mean, rank, annee, saison, num_episodes, source, genres)
|
||||
test <- dfAnimes
|
||||
test %>%
|
||||
mutate(genres = paste(genres, collapse=",")) -> test
|
||||
|
|
47
main.R
47
main.R
|
@ -6,25 +6,37 @@ library("tidyverse")
|
|||
data<-fromJSON("C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\db_animes\\db_animes.json")
|
||||
dfAnimes <- as.data.frame(data)
|
||||
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, data.title, data.type, data.episodes, data.status, data.animeSeason, data.tags)
|
||||
#Filtrage des animes sortis avant 1960 (pas de télé :() et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(start_season$year > 1960) %>%
|
||||
filter(start_season$year < 2021) %>%
|
||||
filter(media_type == "tv") -> dfAnimes
|
||||
|
||||
#Dedoublage de la colonne saison
|
||||
dfAnimes %>%
|
||||
mutate(annee = data.animeSeason$year) %>%
|
||||
mutate(saison = data.animeSeason$season) -> dfAnimes
|
||||
mutate(annee = start_season$year) %>%
|
||||
mutate(saison = start_season$season) -> dfAnimes
|
||||
|
||||
#Transformation de la colonne genre pour la rendre utilisable
|
||||
getgenre <- function(i){
|
||||
dfAnimes[i,13][[1]][[2]] -> res
|
||||
return(res)
|
||||
}
|
||||
|
||||
#Filtrage des animes sortis avant 1960 (pas de télé :() et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1960) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
mutate(genres = lapply(1:4691, getgenre)) -> dfAnimes
|
||||
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, title, mean, rank, annee, saison, num_episodes, source, genres)
|
||||
|
||||
test <- dfAnimes
|
||||
|
||||
test %>%
|
||||
mutate(genres = paste(genres, collapse=",")) -> test
|
||||
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an => a améliorer
|
||||
animeCentury %>%
|
||||
filter(data.episodes>30) %>%
|
||||
dfAnimes %>%
|
||||
filter(num_episodes>30) %>%
|
||||
group_by(annee) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
|
@ -32,11 +44,11 @@ animeCentury %>%
|
|||
longbois %>% ggplot(aes(annee, nbAnimes)) + geom_col()
|
||||
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
animeCentury %>%
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
summarise(lettres=mean(nchar(data.title))) -> dfAnimes
|
||||
|
||||
animeCentury %>%
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
|
||||
|
@ -44,9 +56,12 @@ meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
|||
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
|
||||
#Evolution des 5 tags les plus représentés
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
#Evolution des 5 genres les plus représentés
|
||||
|
||||
# Explosion de la colonne tags
|
||||
dfAnimes %>%
|
||||
mutate(annee = anime$year) %>%
|
||||
mutate(saison = start_season$season) -> animes_genres_doubles
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue