mercredi soir
This commit is contained in:
commit
2ed6382fd8
32 changed files with 1659524 additions and 0 deletions
BIN
.RData
Normal file
BIN
.RData
Normal file
Binary file not shown.
136
.Rhistory
Normal file
136
.Rhistory
Normal file
|
@ -0,0 +1,136 @@
|
|||
library("jsonlite")
|
||||
data<-fromJSON(file="C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\anime-offline-database-master\\anime-offline-database.json")
|
||||
data<-fromJSON("C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\anime-offline-database-master\\anime-offline-database.json")
|
||||
dfAnimes <- as.data.frame(data)
|
||||
library("tidyverse")
|
||||
install.packages("tidyverse")
|
||||
library("tidyverse")
|
||||
dfAnimes
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, data.title, data.type, data.episodes, data.status, data.animeSeason, data.tags)
|
||||
#Nombre d'animes durant plus de 2 cours (24 épisodes) par ans
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>25) %>% longbois
|
||||
#Nombre d'animes durant plus de 2 cours (24 épisodes) par ans
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>25) -> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (24 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) -> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count()-> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count(nbAnimes)-> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count() %>%
|
||||
rename(n = nbAnimes) -> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + ggcol()
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + ggcols()
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + geom_cols()
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + geom_col()
|
||||
longbois %>% ggplot(aes(data.animeSeason, nbAnimes)) + geom_col()
|
||||
longbois %>% ggplot(aes(data.animeSeason$year, nbAnimes)) + geom_col()
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(Lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
View(meanLettersByYear)
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(data.animeSeason$year, lettres)) + geom_col()
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
mutate(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(data.animeSeason$year, lettres)) + geom_col()
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=median(nchar(data.title))) %>%
|
||||
rename(annee = data.animeSeason$year)-> medLettersByYear
|
||||
#Dedoublage de la colonne saison
|
||||
dfAnimes %>%
|
||||
mutate(annee = data.animeSeason$year) %>%
|
||||
mutate(saison = data.animeSeason$season) -> dfAnimes
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
View(medLettersByYear)
|
||||
View(dfAnimes)
|
||||
View(dfAnimes)
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
View(meanLettersByYear)
|
||||
meanLettersByYear %>% ggplot(aes(data.animeSeason$year, lettres)) + geom_col()
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
filter(data, data.animeSeason$year > 1924) -> post1924
|
||||
filter(dfAnimes, dfAnimes.animeSeason$year > 1924) -> post1924
|
||||
dfAnimes
|
||||
filter(dfAnimes, dfAnimes$annee > 1924) -> post1924
|
||||
filter(post1924,post1924$annee < 2022) -> animeCentury
|
||||
animeCentury
|
||||
View(dfAnimes)
|
||||
View(dfAnimes)
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee < 1924) %>%
|
||||
filter(annee > 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee < 1924) %>%
|
||||
filter(annee > 2021) -> animeCentury
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1924) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
View(animeCentury)
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an => a améliorer
|
||||
animeCentury %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(annee) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
longbois %>% ggplot(aes(annee, nbAnimes)) + geom_col()
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1960) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
66
anime-offline-database-master/.github/CONTRIBUTING.md
vendored
Normal file
66
anime-offline-database-master/.github/CONTRIBUTING.md
vendored
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Contribution guidelines
|
||||
Please read the FAQ down below.
|
||||
|
||||
## Possible errors / problems in the database
|
||||
If you find something that, in your opinion, could be the result of incorrectly extacted data, please submit an issue rather than creating a pull request, because the database is created by an automated process.
|
||||
|
||||
## Adding your project to the list of projects using this database
|
||||
In case you have a project that uses this database and you want to add it to the list of projects that are using this database, create a pull request adding it to the table. Do not create an issue asking me or anyone else to add it.
|
||||
|
||||
+ You have to be the author/maintainer of the project that you want to add
|
||||
+ Create a PR in which you add it to the table in the README.md
|
||||
+ Do not change/alter anything else
|
||||
+ Your project has to use this database
|
||||
+ You have to have a link back to this project in the README.md of your project
|
||||
+ The README.md of your project has to be in english or it must have an english translation
|
||||
+ Your project has to be hosted either on github or gitlab
|
||||
+ The table is sorted by project name (ascending). Add your entry accordingly.
|
||||
+ Project name must match the repository name and link directly to the source code (not a project page such as YOURNAME.github.io)
|
||||
+ Put your name under _Author/Maintainer_ with a link to your profile.
|
||||
+ Add a meaningful description in english. The description must not be longer than 150 characters.
|
||||
|
||||
# FAQ
|
||||
|
||||
## What do you mean by 'meta data provider'?
|
||||
Websites which provide information about anime such as `myanimelist.net`, `notify.moe`, ...
|
||||
|
||||
## Can you please add additional data/properties?
|
||||
No. The dataset has been created for my own tool. It contains all data/properties that I need and I won't add more data/properties. This is merely an index. The idea is to visit the meta data provider of your choice to get additional information about the anime.
|
||||
|
||||
## Can you please add an additional meta data provider?
|
||||
No. I don't plan to add any additional meta data provider.
|
||||
|
||||
## Can you please change the structure of the file?
|
||||
No. The file has the structure that it needs to have for the purpose it has been built for.
|
||||
|
||||
## There are duplicates in the dataset.
|
||||
If the entry of one meta data provider is not merged with an entry of a different meta data provider, although they are practically the same entry, then this is **not a duplicate**.
|
||||
They are simply not merged together. This can happen and it is intentional. Since this dataset is created automatically two entries should rather not be merged than falsely merged together.
|
||||
If you query this dataset based on titles/synonyms it might seem that there are duplicates. However the intended usage is to query by the url of the meta data provider. This way you will always retrieve the entry that you want. Entries being merged together is just a nice to have.
|
||||
|
||||
A duplicate by defintion of this dataset is an entry which contains multiple links of the same meta data provider in `sources`.
|
||||
|
||||
## Why are there no IDs?
|
||||
There are. The entries under `sources` are the IDs. Each one of the array's URLs is a key for that specific entry.
|
||||
|
||||
## Is this dataset created automatically or manually?
|
||||
It is created automatically and reviewed in a half-automated process.
|
||||
|
||||
## Do you plan to open source the code which creates this dataset?
|
||||
Yes. Parts of the code are already [available](https://github.com/manami-project?tab=repositories&q=modb&type=source). However there is still work to do before I can/want to open source the rest and that doesn't have any priority right now.
|
||||
|
||||
## How do you split entries?
|
||||
Entries are split if one meta data provider lists multiple entries as one and others don't.
|
||||
**Example:**
|
||||
* The entry of a meta data provider which lists 3 Movies as one entry is split from three separate entries of another meta data provider
|
||||
* A series is listed as one entry having 26 episodes on one meta data provider and as two entries having 13 episodes each on the other meta data provider
|
||||
|
||||
However if one entry is listed with 13 episodes whereas the other is listed with 12, because it doesn't count the recap episode then these entries are still merged together.
|
||||
|
||||
## Can I somehow contribute?
|
||||
Currently I can't think of a way. But you can check the [predefined issue templates](https://github.com/manami-project/anime-offline-database/issues/new/choose) in case you want to report to one of the available cases.
|
||||
|
||||
## Does this dataset contain all anime from the supported meta data provider?
|
||||
No. MAL and anisearch are the only provider which list adult titles publicly. So this type of anime is missing for the other meta data providers.
|
||||
If there are new entries which have been created after an update then those obviously won't appear until the next update.
|
||||
Apart from that it should contain all titles from the supported meta data provider.
|
8
anime-offline-database-master/.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
8
anime-offline-database-master/.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Guide to add your project to the project list.
|
||||
url: https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#adding-your-project-to-the-list-of-projects-using-this-database
|
||||
about: How to add your project to the list of projects using this database.
|
||||
- name: FAQ
|
||||
url: https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq
|
||||
about: Frequently Asked Questions
|
51
anime-offline-database-master/.github/ISSUE_TEMPLATE/falsely-merged-entries.md
vendored
Normal file
51
anime-offline-database-master/.github/ISSUE_TEMPLATE/falsely-merged-entries.md
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
---
|
||||
name: Falsely merged entry
|
||||
about: Entries have been merged together although they should be separate entries?
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: manami-project
|
||||
|
||||
---
|
||||
|
||||
Please read the [FAQ](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq) first.
|
||||
Especially the sections on [duplicates](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#there-are-duplicates-in-the-data-set) and [splits](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#how-do-you-split-entries). Please refrain from creating issues stating that entries should be merged together. This is only for _splitting_ entries which have already been merged together, but should be separated.
|
||||
**Only one entry per issue**
|
||||
|
||||
## Which entry should be split? (original from data set)
|
||||
|
||||
**Example:**
|
||||
```
|
||||
"https://anidb.net/anime/9466",
|
||||
"https://anilist.co/anime/15809",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer",
|
||||
"https://kitsu.io/anime/7314",
|
||||
"https://myanimelist.net/anime/15809",
|
||||
"https://notify.moe/anime/CGnFpKimR"
|
||||
"https://anidb.net/anime/16104",
|
||||
"https://anilist.co/anime/130592",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer-2",
|
||||
"https://kitsu.io/anime/44113",
|
||||
"https://myanimelist.net/anime/48413",
|
||||
"https://notify.moe/anime/Zy3-TV8MR"
|
||||
```
|
||||
|
||||
## How should it be split?
|
||||
|
||||
**Example:**
|
||||
```
|
||||
"https://anidb.net/anime/9466",
|
||||
"https://anilist.co/anime/15809",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer",
|
||||
"https://kitsu.io/anime/7314",
|
||||
"https://myanimelist.net/anime/15809",
|
||||
"https://notify.moe/anime/CGnFpKimR"
|
||||
```
|
||||
|
||||
```
|
||||
"https://anidb.net/anime/16104",
|
||||
"https://anilist.co/anime/130592",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer-2",
|
||||
"https://kitsu.io/anime/44113",
|
||||
"https://myanimelist.net/anime/48413",
|
||||
"https://notify.moe/anime/Zy3-TV8MR"
|
||||
```
|
19
anime-offline-database-master/.github/ISSUE_TEMPLATE/problem-in-data-extraction.md
vendored
Normal file
19
anime-offline-database-master/.github/ISSUE_TEMPLATE/problem-in-data-extraction.md
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
name: Problem in data extraction
|
||||
about: Is there a problem in the data extraction?
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: manami-project
|
||||
|
||||
---
|
||||
|
||||
Please read the [FAQ](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq) first.
|
||||
|
||||
|
||||
* Which data is not extracted correctly? (e.g. title, episodes...)
|
||||
|
||||
|
||||
* Can you provide an example entry?
|
||||
|
||||
|
||||
* Which value is expected?
|
10
anime-offline-database-master/.github/ISSUE_TEMPLATE/question.md
vendored
Normal file
10
anime-offline-database-master/.github/ISSUE_TEMPLATE/question.md
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
name: Question
|
||||
about: You have a question which was not covered by the FAQ?
|
||||
title: ''
|
||||
labels: question
|
||||
assignees: manami-project
|
||||
|
||||
---
|
||||
|
||||
Please read the [FAQ](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq) first.
|
37
anime-offline-database-master/.github/workflows/json_lint.yml
vendored
Normal file
37
anime-offline-database-master/.github/workflows/json_lint.yml
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
name: Check JSON files
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '**'
|
||||
paths-ignore:
|
||||
- 'README.md'
|
||||
- '.gitignore'
|
||||
- '.gitattributes'
|
||||
- '.github/**/*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup node environment
|
||||
uses: actions/setup-node@v1
|
||||
with:
|
||||
node-version: '14'
|
||||
- name: Install jsonlint
|
||||
run: npm install jsonlint -g
|
||||
- name: Check anime-offline-database.json
|
||||
run: jsonlint -q anime-offline-database.json
|
||||
- name: Check anime-offline-database-minified.json
|
||||
run: jsonlint -q anime-offline-database-minified.json
|
||||
- name: Check dead-entries for anidb
|
||||
run: jsonlint -q dead-entries/anidb.json
|
||||
- name: Check dead-entries for anilist
|
||||
run: jsonlint -q dead-entries/anilist.json
|
||||
- name: Check dead-entries for kitsu
|
||||
run: jsonlint -q dead-entries/kitsu.json
|
||||
- name: Check dead-entries for livechart
|
||||
run: jsonlint -q dead-entries/livechart.json
|
||||
- name: Check dead-entries for myanimelist
|
||||
run: jsonlint -q dead-entries/myanimelist.json
|
29
anime-offline-database-master/.gitignore
vendored
Normal file
29
anime-offline-database-master/.gitignore
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
|
||||
!.gitignore
|
||||
!README.md
|
||||
!anime-offline-database.json
|
||||
!anime-offline-database-minified.json
|
||||
|
||||
!.github/
|
||||
.github/*
|
||||
!.github/CONTRIBUTING.md
|
||||
|
||||
!.github/workflows/
|
||||
.github/workflows/*
|
||||
!.github/workflows/json_lint.yml
|
||||
|
||||
!.github/ISSUE_TEMPLATE/
|
||||
.github/ISSUE_TEMPLATE/*
|
||||
!.github/ISSUE_TEMPLATE/problem-in-data-extraction.md
|
||||
!.github/ISSUE_TEMPLATE/question.md
|
||||
!.github/ISSUE_TEMPLATE/falsely-merged-entries.md
|
||||
!.github/ISSUE_TEMPLATE/config.yml
|
||||
|
||||
!dead-entries/
|
||||
dead-entries/*
|
||||
!dead-entries/anidb.json
|
||||
!dead-entries/anilist.json
|
||||
!dead-entries/kitsu.json
|
||||
!dead-entries/myanimelist.json
|
||||
!dead-entries/livechart.json
|
229
anime-offline-database-master/README.md
Normal file
229
anime-offline-database-master/README.md
Normal file
|
@ -0,0 +1,229 @@
|
|||

|
||||
# anime-offline-database
|
||||
The purpose of this repository is to create an offline database containing anime meta data aggregated by different anime meta data providers (such as myanimelist.net, anidb.net, kitsu.io and more) and allow cross references between those meta data providers. This file is supposed to be used by and created for [manami](https://github.com/manami-project/manami).
|
||||
|
||||
**The goal is to deliver at least weekly updates.**
|
||||
|
||||
## Statistics
|
||||
Update **week 48 [2021]**
|
||||
|
||||
The database consists of **33043** entries composed of:
|
||||
+ 23233 entries from myanimelist.net
|
||||
+ 18215 entries from anime-planet.com
|
||||
+ 17231 entries from kitsu.io
|
||||
+ 16208 entries from anisearch.com
|
||||
+ 15526 entries from anilist.co
|
||||
+ 15175 entries from notify.moe
|
||||
+ 12127 entries from anidb.net
|
||||
+ 9562 entries from livechart.me
|
||||
|
||||
Missed updates:
|
||||
+ **2021:** 0 _(so far)_
|
||||
+ **2020:** 0
|
||||
+ **2019:** 2
|
||||
+ **2018:** 1
|
||||
|
||||
## Structure
|
||||
This repository contains various JSON files. The database file itself as well as one file containing IDs of dead entries for each meta data provider to support the automated process.
|
||||
|
||||
### anime-offline-database-minified.json
|
||||
|
||||
Minified version of `anime-offline-database.json` which contains the same data, but is smaller in size.
|
||||
|
||||
### anime-offline-database.json
|
||||
|
||||
#### Data types
|
||||
|
||||
**Root**
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| data | ```Anime[]``` | no |
|
||||
|
||||
**Anime**
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| sources | ```URL[]``` | no |
|
||||
| title | ```String``` | no |
|
||||
| type | ```Enum of [TV, MOVIE, OVA, ONA, SPECIAL, UNKNOWN]``` | no |
|
||||
| episodes | ```Integer``` | no |
|
||||
| status | ```Enum of [FINISHED, ONGOING, UPCOMING, UNKNOWN]``` | no |
|
||||
| animeSeason | ```AnimeSeason``` | no |
|
||||
| picture | ```URL``` | no |
|
||||
| thumbnail | ```URL``` | no |
|
||||
| synonyms | ```String[]``` | no |
|
||||
| relations | ```URL[]``` | no |
|
||||
| tags | ```String[]``` | no |
|
||||
|
||||
**AnimeSeason**
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| season | ```Enum of [SPRING, SUMMER, FALL, WINTER, UNDEFINED]``` | no |
|
||||
| year | ```Integer``` | yes |
|
||||
|
||||
#### Example:
|
||||
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"sources": [
|
||||
"https://anidb.net/anime/4563",
|
||||
"https://anilist.co/anime/1535",
|
||||
"https://anime-planet.com/anime/death-note",
|
||||
"https://anisearch.com/anime/3633",
|
||||
"https://kitsu.io/anime/1376",
|
||||
"https://livechart.me/anime/3437",
|
||||
"https://myanimelist.net/anime/1535",
|
||||
"https://notify.moe/anime/0-A-5Fimg"
|
||||
],
|
||||
"title": "Death Note",
|
||||
"type": "TV",
|
||||
"episodes": 37,
|
||||
"status": "FINISHED",
|
||||
"animeSeason": {
|
||||
"season": "FALL",
|
||||
"year": 2006
|
||||
},
|
||||
"picture": "https://cdn.myanimelist.net/images/anime/9/9453.jpg",
|
||||
"thumbnail": "https://cdn.myanimelist.net/images/anime/9/9453t.jpg",
|
||||
"synonyms": [
|
||||
"Bilježnica smrti",
|
||||
"Caderno da Morte",
|
||||
"Carnet de la Mort",
|
||||
"DEATH NOTE",
|
||||
"DN",
|
||||
"Death Note - A halállista",
|
||||
"Death Note - Carnetul morţii",
|
||||
"Death Note - Zápisník smrti",
|
||||
"Mirties Užrašai",
|
||||
"Notatnik śmierci",
|
||||
"Notes Śmierci",
|
||||
"Quaderno della Morte",
|
||||
"Sveska Smrti",
|
||||
"Ölüm Defteri",
|
||||
"Τετράδιο Θανάτου",
|
||||
"Бележник на Смъртта",
|
||||
"Записник Смерті",
|
||||
"Свеска Смрти",
|
||||
"Тетрадка на Смъртта",
|
||||
"Тетрадь cмерти",
|
||||
"Үхлийн Тэмдэглэл",
|
||||
"מחברת המוות",
|
||||
"دفترچه مرگ",
|
||||
"دفترچه یادداشت مرگ",
|
||||
"كـتـاب الـموت",
|
||||
"مدونة الموت",
|
||||
"مذكرة الموت",
|
||||
"موت نوٹ",
|
||||
"डेथ नोट",
|
||||
"ですのーと",
|
||||
"デスノート",
|
||||
"死亡笔记",
|
||||
"데스노트"
|
||||
],
|
||||
"relations": [
|
||||
"https://anidb.net/anime/8146",
|
||||
"https://anidb.net/anime/8147",
|
||||
"https://anilist.co/anime/2994",
|
||||
"https://anime-planet.com/anime/death-note-rewrite-1-visions-of-a-god",
|
||||
"https://anime-planet.com/anime/death-note-rewrite-2-ls-successors",
|
||||
"https://anisearch.com/anime/4441",
|
||||
"https://anisearch.com/anime/5194",
|
||||
"https://kitsu.io/anime/2707",
|
||||
"https://livechart.me/anime/3808",
|
||||
"https://myanimelist.net/anime/2994",
|
||||
"https://notify.moe/anime/DBBU5Kimg"
|
||||
],
|
||||
"tags": [
|
||||
"alternative present",
|
||||
"amnesia",
|
||||
"anti-hero",
|
||||
"asexual",
|
||||
"asia",
|
||||
"based on a manga",
|
||||
"contemporary fantasy",
|
||||
"cops",
|
||||
"crime",
|
||||
"crime fiction",
|
||||
"criminals",
|
||||
"detective",
|
||||
"detectives",
|
||||
"drama",
|
||||
"earth",
|
||||
"espionage",
|
||||
"fantasy",
|
||||
"genius",
|
||||
"gods",
|
||||
"hero of strong character",
|
||||
"horror",
|
||||
"japan",
|
||||
"kamis",
|
||||
"kuudere",
|
||||
"male protagonist",
|
||||
"manga",
|
||||
"mind games",
|
||||
"mystery",
|
||||
"overpowered main characters",
|
||||
"philosophy",
|
||||
"plot continuity",
|
||||
"police",
|
||||
"policeman",
|
||||
"present",
|
||||
"primarily adult cast",
|
||||
"primarily male cast",
|
||||
"psychological",
|
||||
"psychological drama",
|
||||
"psychopaths",
|
||||
"revenge",
|
||||
"rivalries",
|
||||
"secret identity",
|
||||
"serial killers",
|
||||
"shinigami",
|
||||
"shounen",
|
||||
"supernatural",
|
||||
"supernatural drama",
|
||||
"thriller",
|
||||
"time skip",
|
||||
"tragedy",
|
||||
"twisted story",
|
||||
"university",
|
||||
"urban",
|
||||
"urban fantasy",
|
||||
"vigilantes"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### dead-entries
|
||||
Contains IDs which have been removed from the database of the corresponding meta data provider.
|
||||
|
||||
#### Data types
|
||||
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| deadEntries | ```String[]``` | no |
|
||||
|
||||
#### Example
|
||||
|
||||
```json
|
||||
{
|
||||
"deadEntries": [
|
||||
"38492",
|
||||
"38518",
|
||||
"38522",
|
||||
"38531"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Other projects using this database
|
||||
If you have a project that uses this database and you want to add it to this list, please read the [contribution guidelines](./.github/CONTRIBUTING.md) first.
|
||||
|
||||
|Project|Author/Maintainer|Short description|
|
||||
|----|----|----|
|
||||
|[adb-zeppelin-statistics](https://github.com/manami-project/adb-zeppelin-statistics)|[manami-project](https://github.com/manami-project)|A set of statistics and insights about anime on MAL.|
|
||||
|[animanga-wordlist](https://github.com/ryuuganime/animanga-wordlist)|[ryuuganime](https://github.com/ryuuganime)|Japanese Anime, Manga, Characters, and Studio Word List/Dictionary|
|
||||
|[arm-server](https://github.com/BeeeQueue/arm-server)|[BeeeQueue](https://github.com/BeeeQueue)|A REST API for querying this database.|
|
||||
|[manami](https://github.com/manami-project/manami)|[manami-project](https://github.com/manami-project)|A tool to catalog anime on your hard drive and discover new anime to watch.|
|
File diff suppressed because one or more lines are too long
1442843
anime-offline-database-master/anime-offline-database.json
Normal file
1442843
anime-offline-database-master/anime-offline-database.json
Normal file
File diff suppressed because it is too large
Load diff
4532
anime-offline-database-master/dead-entries/anidb.json
Normal file
4532
anime-offline-database-master/dead-entries/anidb.json
Normal file
File diff suppressed because it is too large
Load diff
126697
anime-offline-database-master/dead-entries/anilist.json
Normal file
126697
anime-offline-database-master/dead-entries/anilist.json
Normal file
File diff suppressed because it is too large
Load diff
28317
anime-offline-database-master/dead-entries/kitsu.json
Normal file
28317
anime-offline-database-master/dead-entries/kitsu.json
Normal file
File diff suppressed because it is too large
Load diff
1356
anime-offline-database-master/dead-entries/livechart.json
Normal file
1356
anime-offline-database-master/dead-entries/livechart.json
Normal file
File diff suppressed because it is too large
Load diff
27259
anime-offline-database-master/dead-entries/myanimelist.json
Normal file
27259
anime-offline-database-master/dead-entries/myanimelist.json
Normal file
File diff suppressed because it is too large
Load diff
52
main.R
Normal file
52
main.R
Normal file
|
@ -0,0 +1,52 @@
|
|||
library("jsonlite")
|
||||
library("tidyverse")
|
||||
|
||||
|
||||
|
||||
data<-fromJSON("C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\anime-offline-database-master\\anime-offline-database.json")
|
||||
dfAnimes <- as.data.frame(data)
|
||||
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, data.title, data.type, data.episodes, data.status, data.animeSeason, data.tags)
|
||||
|
||||
#Dedoublage de la colonne saison
|
||||
dfAnimes %>%
|
||||
mutate(annee = data.animeSeason$year) %>%
|
||||
mutate(saison = data.animeSeason$season) -> dfAnimes
|
||||
|
||||
#Filtrage des animes sortis avant 1960 (pas de télé :() et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1960) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
|
||||
|
||||
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an => a améliorer
|
||||
animeCentury %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(annee) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
|
||||
longbois %>% ggplot(aes(annee, nbAnimes)) + geom_col()
|
||||
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
|
||||
#Evolution des 5 tags les plus représentés
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
|
||||
|
||||
|
BIN
tp-analyse-explorative-master.zip
Normal file
BIN
tp-analyse-explorative-master.zip
Normal file
Binary file not shown.
35
tp-analyse-explorative-master/tp-analyse-explorative/.gitignore
vendored
Normal file
35
tp-analyse-explorative-master/tp-analyse-explorative/.gitignore
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
# ---> R
|
||||
# History files
|
||||
.Rhistory
|
||||
.Rapp.history
|
||||
|
||||
# Session Data files
|
||||
.RData
|
||||
|
||||
# Example code in package build process
|
||||
*-Ex.R
|
||||
|
||||
# Output files from R CMD build
|
||||
/*.tar.gz
|
||||
|
||||
# Output files from R CMD check
|
||||
/*.Rcheck/
|
||||
|
||||
# RStudio files
|
||||
.Rproj.user/
|
||||
|
||||
# produced vignettes
|
||||
vignettes/*.html
|
||||
vignettes/*.pdf
|
||||
|
||||
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
|
||||
.httr-oauth
|
||||
|
||||
# knitr and R markdown default cache directories
|
||||
/*_cache/
|
||||
/cache/
|
||||
|
||||
# Temporary files created by R markdown
|
||||
*.utf8.md
|
||||
*.knit.md
|
||||
|
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
# tp-analyse-explorative
|
||||
|
125
tp-analyse-explorative-master/tp-analyse-explorative/exo1.R
Normal file
125
tp-analyse-explorative-master/tp-analyse-explorative/exo1.R
Normal file
|
@ -0,0 +1,125 @@
|
|||
### 1 Vectors ###
|
||||
|
||||
### 1.1 Let's start simple
|
||||
e1 = c(2,5,0,8)
|
||||
e2 = 1:200
|
||||
e3 = seq(-210,-200,2)
|
||||
e4 = 2^1:7
|
||||
v = rep(c(1,-1),times=25)
|
||||
e5 = c(e2,e3)
|
||||
?seq
|
||||
e6 = seq(0,1,length=70)
|
||||
e7 = rep(e1,times=10)
|
||||
e2-e3
|
||||
|
||||
|
||||
### 1.2 Character vectors
|
||||
# All vowels
|
||||
vowels = c('a','e','i','o','u','y')
|
||||
# All letters
|
||||
letters = letters
|
||||
# True or false if vowels are in letters
|
||||
letters %in% vowels
|
||||
# Index of each vowels
|
||||
vowelsInLetters = which(letters %in% vowels)
|
||||
# Index of each non-vowels
|
||||
notVowelsInLetters = which(!(letters %in% vowels))
|
||||
# Letters after vowels
|
||||
lettersAfterVowels = letters[vowelsInLetters+1]
|
||||
# myname with my name
|
||||
myname = "Titouan"
|
||||
# strsplit to extract letters
|
||||
mynameSplited = strsplit(myname, NULL)
|
||||
# Access its first element
|
||||
mynameSplited[[1]][1]
|
||||
mynameSplited[[1]]
|
||||
mynameSplited[1]
|
||||
# Index in alphabet of letters in my name
|
||||
indexMyname = which(letters %in% mynameSplited[[1]])
|
||||
# Index in alphabet of letters in my neighbour name
|
||||
neighborname = "Marianne"
|
||||
neighbornameSplited = strsplit(neighborname, NULL)
|
||||
neighbornameSplited[[1]][1]
|
||||
indexNeighborname = which(letters %in% neighbornameSplited[[1]])
|
||||
# Min of average of index
|
||||
mynameAverage = mean(indexMyname)
|
||||
neighborAverage = mean(indexNeighborname)
|
||||
minIndexOfName = min(c(mynameAverage,neighborAverage))
|
||||
|
||||
### 2 DataFrames ###
|
||||
|
||||
### 2.1 Cute Animals
|
||||
# All vowels
|
||||
vowels = c('a','e','i','o','u','y')
|
||||
# All letters
|
||||
letters = letters
|
||||
# Database of 3 columns : alphabet letters, number of each letter, binary variable vowel
|
||||
database = data.frame(letter=letters, index=1:26, isVowels=letters %in% vowels)
|
||||
# Extracting lines corresponding to my name
|
||||
indexMyname = letters %in% strsplit("Titouan", NULL)[[1]]
|
||||
database[indexMyname,"index"]
|
||||
# Examining msleep dataset
|
||||
library(tidyverse)
|
||||
head(msleep)
|
||||
str(msleep)
|
||||
names(msleep)
|
||||
summary
|
||||
# Sanity check animals either awake or asleep
|
||||
which(msleep$sleep_total + msleep$awake == 24 )
|
||||
# Animal that sleep the most
|
||||
msleep[which.max(msleep$sleep_total),]
|
||||
# Animals of less than 100g and that sleeps more than half a day
|
||||
nrow(msleep[which(msleep$sleep_total > 12 & msleep$bodywt < 0.1),])
|
||||
# Average brainwt/bodywt ratio (ajoute une colonne ratio)
|
||||
msleep$ratio = msleep$brainwt/msleep$bodywt
|
||||
mean(msleep$ratio, na.rm = TRUE)
|
||||
# Animal with the highest ratio
|
||||
msleep[which.max(msleep$ratio),]
|
||||
|
||||
### 2.2 Endangered animals
|
||||
|
||||
# Create a copy and reorder its factors
|
||||
msleep_copy = msleep
|
||||
msleep_copy$conservation = factor(x = msleep_copy$conservation, c("lc","domesticated","cd","nt","vu","en"), ordered = TRUE)
|
||||
# Compare average weight of endangered animals to non-endangered
|
||||
averageWtThreatened = mean(msleep_copy[which(msleep_copy$conservation > "nt"),]$bodywt)
|
||||
averageWtRemaining = mean(msleep_copy[which(msleep_copy$conservation <= "nt"),]$bodywt)
|
||||
|
||||
# Ajoute une colonne threatened, valeur booléenne
|
||||
msleep$threatened = msleep$conservation > "nt"
|
||||
|
||||
|
||||
### 2.3 Functions
|
||||
|
||||
# Create a function taking a name as input and returning corresponding list of letters
|
||||
lettersFromName <- function(name) {
|
||||
return(strsplit(name, NULL)[[1]])
|
||||
}
|
||||
lettersFromName("Titouan")
|
||||
# Fix with empty "" name
|
||||
lettersFromName("")
|
||||
lettersFromNameFixed <- function(name) {
|
||||
if (name == "") {
|
||||
l = NULL
|
||||
}
|
||||
else {
|
||||
l = strsplit(name, NULL)[[1]]
|
||||
}
|
||||
return(l)
|
||||
}
|
||||
lettersFromNameFixed("")
|
||||
# With non-standards characters
|
||||
lettersFromNameFixed("X Æ A-12")
|
||||
# Function returning corresponding genus from animal name
|
||||
genusFromName <- function(name) {
|
||||
genusname <- msleep[tolower(msleep$name) == tolower(name),]$genus
|
||||
if (length(genusname) == 0) {
|
||||
s <- c("I don't know")
|
||||
}
|
||||
else {
|
||||
s <- c("The ",name," is a ", genusname)
|
||||
}
|
||||
#browser()
|
||||
return(paste(s, collapse = ''))
|
||||
}
|
||||
genusFromName("little brown bat")
|
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo1.pdf
Normal file
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo1.pdf
Normal file
Binary file not shown.
315
tp-analyse-explorative-master/tp-analyse-explorative/exo2.R
Normal file
315
tp-analyse-explorative-master/tp-analyse-explorative/exo2.R
Normal file
|
@ -0,0 +1,315 @@
|
|||
library(tidyverse)
|
||||
library(directlabels)
|
||||
|
||||
####### 1 Plot and Given names #######
|
||||
### Exploring the Pink City
|
||||
# read the table prenoms.csv
|
||||
prenoms <- read.csv('/home/labourde/Bureau/tp-analyse-explorative/prenoms.csv',sep=";")
|
||||
# Inspect it
|
||||
head(prenoms)
|
||||
# Plot the number of births by year
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point()
|
||||
# Plot the number of male/female births by year
|
||||
prenoms %>%
|
||||
group_by(Année,Sexe) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point(aes(color=Sexe))
|
||||
# Is my name in the dataset ?
|
||||
prenoms[tolower(prenoms$Prénom) == tolower("Titouan"),]
|
||||
# Represent the 10 most given names
|
||||
prenoms %>%
|
||||
group_by(Prénom) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
arrange(desc(Naissances)) -> mostGivenNames
|
||||
ggplot(head(mostGivenNames,10),aes(x=Prénom,y=Naissances)) +
|
||||
geom_bar(stat='Identity',aes(fill=Prénom))
|
||||
# Select for each year the top 5 given names by sex and their evolution through the years
|
||||
prenoms[prenoms$Sexe == "M",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostMaleGivenNamesByYear
|
||||
prenoms[prenoms$Sexe == "F",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostFemaleGivenNamesByYear
|
||||
mostGivenNamesBySexeAndYear <- rbind(mostMaleGivenNamesByYear, mostFemaleGivenNamesByYear)
|
||||
ggplot(mostGivenNamesBySexeAndYear,aes(x=Année,y=Nombre)) +
|
||||
geom_point(aes(color=Prénom)) +
|
||||
facet_wrap("Prénom")
|
||||
# Plot the average numbers of letters by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Lettres=mean(nchar(Prénom))) -> averageNumberOfLettersByYear
|
||||
ggplot(averageNumberOfLettersByYear,aes(x=Année,y=Lettres)) +
|
||||
geom_line()
|
||||
# Plot the average numbers of vowels/consonants by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(
|
||||
Vowels = mean(nchar(gsub("[éèêëàâäïaeiouy]", "", tolower(Prénom)))),
|
||||
Consonants = mean(nchar(gsub("[zrtpqsdfghjklmwxcvbnç]", "", tolower(Prénom))))
|
||||
) -> averageNumberOfVowelsAndConsonantsByYear
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Vowels)) +
|
||||
geom_point()
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Consonants)) +
|
||||
geom_point()
|
||||
# How the number of composed names change through the years
|
||||
prenoms[str_detect(prenoms$Prénom, regex("[a-zA-Zéèàï]+-[a-zA-ZéèàÎ]+")),] %>%
|
||||
group_by(Année) %>%
|
||||
summarise(nb_composedNames=sum(Nombre)) -> composedNamesByYear
|
||||
ggplot(composedNamesByYear,aes(x=Année,y=nb_composedNames)) +
|
||||
geom_line()
|
||||
# Defining a "hype" criteria and finding the hypest names
|
||||
prenoms %>%
|
||||
group_by(Année) -> namesByYear
|
||||
namesByYear$nbAnnéePrec <- lag(namesByYear$Nombre)
|
||||
namesByYear$difference <- namesByYear$Nombre - namesByYear$nbAnnéePrec
|
||||
|
||||
namesByYear %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = difference, n = 1) -> hypestNameByYear
|
||||
ggplot(hypestNameByYear,aes(x=factor(Année),y=difference)) +
|
||||
geom_text(aes(label=Prénom))
|
||||
|
||||
### Exploring the Gray City
|
||||
# read the table prenomsParis.csv
|
||||
prenomsParis <- read.csv('/home/labourde/Bureau/tp-analyse-explorative/prenomsParis.csv',sep=";")
|
||||
prenomsParis <-rename(prenomsParis, Année = Annee)
|
||||
prenomsParis <-rename(prenomsParis, Prénom = Prenoms)
|
||||
|
||||
nameStudy <- function(prenoms) {
|
||||
# Plot the number of births by year
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
ggtitle("Naissances par année") +
|
||||
geom_point() -> p1
|
||||
# Plot the number of male/female births by year
|
||||
prenoms %>%
|
||||
group_by(Année,Sexe) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
ggtitle("Sexe des naissances par année") +
|
||||
geom_point(aes(color=Sexe)) -> p2
|
||||
# Is my name in the dataset ?
|
||||
prenoms[tolower(prenoms$Prénom) == tolower("Titouan"),]
|
||||
# Represent the 10 most given names
|
||||
prenoms %>%
|
||||
group_by(Prénom) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
arrange(desc(Naissances)) -> mostGivenNames
|
||||
ggplot(head(mostGivenNames,10),aes(x=reorder(Prénom,Naissances),y=Naissances)) +
|
||||
ggtitle("Les 10 noms les plus donnés") +
|
||||
geom_bar(stat='Identity',aes(fill=reorder(Prénom,Naissances))) -> p3
|
||||
# Select for each year the top 5 given names by sex and their evolution through the years
|
||||
prenoms[prenoms$Sexe == "M",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostMaleGivenNamesByYear
|
||||
prenoms[prenoms$Sexe == "F",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostFemaleGivenNamesByYear
|
||||
mostGivenNamesBySexeAndYear <- rbind(mostMaleGivenNamesByYear, mostFemaleGivenNamesByYear)
|
||||
ggplot(mostGivenNamesBySexeAndYear,aes(x=Année,y=Nombre)) +
|
||||
ggtitle("Top 5 des noms donnés, par sexe et par an") +
|
||||
geom_point(aes(color=Prénom)) +
|
||||
facet_wrap("Prénom") -> p4
|
||||
# Plot the average numbers of letters by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Lettres=mean(nchar(Prénom))) -> averageNumberOfLettersByYear
|
||||
ggplot(averageNumberOfLettersByYear,aes(x=Année,y=Lettres)) +
|
||||
ggtitle("Nombre moyen de lettres dans les prénoms par année") +
|
||||
geom_line() -> p5
|
||||
# Plot the average numbers of vowels/consonants by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(
|
||||
Vowels = mean(nchar(gsub("[éèêëàâäïaeiouy]", "", tolower(Prénom)))),
|
||||
Consonants = mean(nchar(gsub("[zrtpqsdfghjklmwxcvbnç]", "", tolower(Prénom))))
|
||||
) -> averageNumberOfVowelsAndConsonantsByYear
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Vowels)) +
|
||||
ggtitle("Nombre moyen de voyelles dans les prénoms par année") +
|
||||
geom_point() -> p6
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Consonants)) +
|
||||
ggtitle("Nombre moyen de consonnes dans les prénoms par année") +
|
||||
geom_point() -> p7
|
||||
# How the number of composed names change through the years
|
||||
prenoms[str_detect(prenoms$Prénom, regex("[a-zA-Zéèàï]+-[a-zA-ZéèàÎ]+")),] %>%
|
||||
group_by(Année) %>%
|
||||
summarise(nb_composedNames=sum(Nombre)) -> composedNamesByYear
|
||||
ggplot(composedNamesByYear,aes(x=Année,y=nb_composedNames)) +
|
||||
ggtitle("Nombre de prénoms composés par année") +
|
||||
geom_line() -> p8
|
||||
# Defining a "hype" criteria and finding the hypest names
|
||||
prenoms %>%
|
||||
group_by(Année) -> namesByYear
|
||||
namesByYear$nbAnnéePrec <- lag(namesByYear$Nombre)
|
||||
namesByYear$difference <- namesByYear$Nombre - namesByYear$nbAnnéePrec
|
||||
namesByYear %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = difference, n = 1) -> hypestNameByYear
|
||||
ggplot(hypestNameByYear,aes(x=factor(Année),y=difference)) +
|
||||
ggtitle("Prénom le plus hype par année") +
|
||||
geom_text(aes(label=Prénom)) -> p9
|
||||
return(list(p1,p2,p3,p4,p5,p6,p7,p8,p9))
|
||||
}
|
||||
plotsParis <- nameStudy(prenomsParis)
|
||||
plotsParis
|
||||
|
||||
# A tale of two (or more) cities
|
||||
|
||||
prenoms <- subset( prenoms, select = -Ordre )
|
||||
prenoms$Ville <- "Toulouse"
|
||||
prenomsParis$Ville <- "Paris"
|
||||
allPrenoms <- rbind(prenoms,prenomsParis)
|
||||
allPrenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
mutate(NaissancesVilleAnnée=sum(Nombre)) -> allPrenoms
|
||||
|
||||
nameStudyCombined <- function(prenoms) {
|
||||
# Plot the number of births by year
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) +
|
||||
ggtitle("Naissances par année") -> p1
|
||||
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
mutate(NaissancesVilleAnnée=sum(Nombre)) -> prenoms
|
||||
|
||||
# Plot the number of male/female births by year and by cities
|
||||
prenoms %>%
|
||||
group_by(Année,Sexe,Ville) %>%
|
||||
summarise(Naissances=100*sum(Nombre)/NaissancesVilleAnnée) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point(aes(color=Sexe)) +
|
||||
facet_wrap("Ville") +
|
||||
ggtitle("Sexe des naissances par année en %") -> p2
|
||||
|
||||
# Represent the 10 most given names by cities
|
||||
prenoms %>%
|
||||
group_by(Ville) %>%
|
||||
mutate(NaissancesVille = sum(Nombre)) %>%
|
||||
group_by(Prénom,Ville) %>%
|
||||
summarise(Naissances=100*sum(Nombre)/sum(NaissancesVille)) %>%
|
||||
group_by(Ville) %>%
|
||||
slice_max(order_by = Naissances, n = 10) -> mostGivenNames
|
||||
ggplot(mostGivenNames,aes(x=reorder(Prénom,Naissances),y=Naissances)) +
|
||||
ggtitle("Les 10 noms les plus donnés par ville") +
|
||||
geom_bar(stat='Identity',aes(fill=reorder(Prénom,Naissances))) +
|
||||
facet_wrap("Ville") -> p3
|
||||
|
||||
# Select for each year the top 5 given names by sex and their evolution through the years by cites
|
||||
tryCatch(
|
||||
{
|
||||
prenoms[prenoms$Sexe == "M",] %>%
|
||||
group_by(Ville,Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostMaleGivenNamesByYear
|
||||
prenoms[prenoms$Sexe == "F",] %>%
|
||||
group_by(Ville,Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostFemaleGivenNamesByYear
|
||||
mostGivenNamesBySexeAndYear <- rbind(mostMaleGivenNamesByYear, mostFemaleGivenNamesByYear)
|
||||
mostGivenNamesBySexeAndYear %>%
|
||||
group_by(Prénom,Ville,Année) %>%
|
||||
mutate(Naissances=100*sum(Nombre)/sum(NaissancesVilleAnnée)) -> mostGivenNamesBySexeAndYearNormalized
|
||||
ggplot(mostGivenNamesBySexeAndYearNormalized,aes(x=Année,y=Naissances)) +
|
||||
ggtitle("Top 5 des noms donnés, par sexe, par an et par ville") +
|
||||
geom_line(aes(color=Prénom)) +
|
||||
geom_point(aes(color=Prénom,shape=Sexe),size=3) +
|
||||
geom_dl(aes(label=Prénom), method=list(dl.combine("first.points")), cex = 0.8) +
|
||||
facet_wrap("Ville")
|
||||
},
|
||||
error=function(e) e
|
||||
) -> p4
|
||||
|
||||
|
||||
# Plot the average numbers of letters by years by cities
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
summarise(Lettres=mean(nchar(Prénom))) -> averageNumberOfLettersByYear
|
||||
ggplot(averageNumberOfLettersByYear,aes(x=Année,y=Lettres)) +
|
||||
ggtitle("Nombre moyen de lettres dans les prénoms par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p5
|
||||
|
||||
# Plot the average numbers of vowels/consonants by years and by cities
|
||||
prenoms %>%
|
||||
group_by(Ville, Année) %>%
|
||||
summarise(
|
||||
Vowels = mean(nchar(gsub("[éèêëàâäïaeiouy]", "", tolower(Prénom)))),
|
||||
Consonants = mean(nchar(gsub("[zrtpqsdfghjklmwxcvbnç]", "", tolower(Prénom))))
|
||||
) -> averageNumberOfVowelsAndConsonantsByYear
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Vowels)) +
|
||||
ggtitle("Nombre moyen de voyelles dans les prénoms par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p6
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Consonants)) +
|
||||
ggtitle("Nombre moyen de consonnes dans les prénoms par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p7
|
||||
|
||||
# How the number of composed names change through the years
|
||||
prenoms[str_detect(prenoms$Prénom, regex("[a-zA-Zéèàï]+-[a-zA-ZéèàÎ]+")),] %>%
|
||||
group_by(Année,Ville) %>%
|
||||
summarise(nb_composedNames=100*sum(Nombre)/NaissancesVilleAnnée) -> composedNamesByYear
|
||||
ggplot(composedNamesByYear,aes(x=Année,y=nb_composedNames)) +
|
||||
ggtitle("Nombre de prénoms composés par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p8
|
||||
|
||||
# Defining a "hype" criteria and finding the hypest names
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) -> namesByYearAndCities
|
||||
namesByYearAndCities$nbAnnéePrec <- lag(namesByYearAndCities$Nombre)
|
||||
namesByYearAndCities$difference <- namesByYearAndCities$Nombre - namesByYearAndCities$nbAnnéePrec
|
||||
namesByYearAndCities %>%
|
||||
group_by(Année,Ville) %>%
|
||||
slice_max(order_by = difference, n = 1) -> hypestNameByYearAndCities
|
||||
ggplot(hypestNameByYearAndCities,aes(x=factor(Année),y=difference/NaissancesVilleAnnée)) +
|
||||
ggtitle("Prénom le plus hype par année et par ville") +
|
||||
geom_text(aes(label=Prénom,color=Prénom)) +
|
||||
facet_wrap("Ville") -> p9
|
||||
|
||||
return(list(p1,p2,p3,p4,p5,p6,p7,p8,p9))
|
||||
}
|
||||
|
||||
combinedParisToulouse <- nameStudyCombined(allPrenoms)
|
||||
combinedParisToulouse
|
||||
|
||||
# The most unshared names
|
||||
allPrenoms %>%
|
||||
group_by(Ville) %>%
|
||||
mutate(NaissancesVille=sum(Nombre)) %>%
|
||||
group_by(Prénom,Ville) %>%
|
||||
mutate(Proportion=sum(Nombre)/NaissancesVille) %>%
|
||||
group_by(Prénom,Ville) -> namesByCities
|
||||
namesByCities$autreVilleProportion <- lag(namesByCities$Proportion)
|
||||
namesByCities$difference <- namesByCities$Proportion - namesByCities$autreVilleProportion
|
||||
|
||||
|
||||
### A Tale of many cities
|
||||
# read the table prenomsRennesStrassNantesToul.csv
|
||||
prenomsRennesStrassNantesToul <- read.csv('/home/labourde/Bureau/tp-analyse-explorative/prenomsRennesStrassNantesToul.csv',sep=";")
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Année = ANNAISS)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Ville = LBCOM)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Sexe = SEX)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Prénom = PRN)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Nombre = NRB)
|
||||
prenomsRennesStrassNantesToul[prenomsRennesStrassNantesToul$Ville=="RENNES",] -> a
|
||||
combinedRennesStrassNantesToul <- nameStudyCombined(prenomsRennesStrassNantesToul)
|
||||
combinedRennesStrassNantesToul[1]
|
||||
combinedRennesStrassNantesToul[2]
|
||||
combinedRennesStrassNantesToul[3]
|
||||
combinedRennesStrassNantesToul[4]
|
||||
combinedRennesStrassNantesToul[5]
|
||||
combinedRennesStrassNantesToul[6]
|
||||
combinedRennesStrassNantesToul[7]
|
||||
combinedRennesStrassNantesToul[8]
|
||||
combinedRennesStrassNantesToul[9]
|
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo2.pdf
Normal file
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo2.pdf
Normal file
Binary file not shown.
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo3.pdf
Normal file
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo3.pdf
Normal file
Binary file not shown.
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo4.pdf
Normal file
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo4.pdf
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load diff
5251
tp-analyse-explorative-master/tp-analyse-explorative/prenoms.csv
Normal file
5251
tp-analyse-explorative-master/tp-analyse-explorative/prenoms.csv
Normal file
File diff suppressed because it is too large
Load diff
11001
tp-analyse-explorative-master/tp-analyse-explorative/prenomsParis.csv
Normal file
11001
tp-analyse-explorative-master/tp-analyse-explorative/prenomsParis.csv
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue