mercredi soir
Questo commit è contenuto in:
commit
2ed6382fd8
32 ha cambiato i file con 1659524 aggiunte e 0 eliminazioni
BIN
.RData
File normale
BIN
.RData
File normale
File binario non mostrato.
136
.Rhistory
File normale
136
.Rhistory
File normale
|
@ -0,0 +1,136 @@
|
|||
library("jsonlite")
|
||||
data<-fromJSON(file="C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\anime-offline-database-master\\anime-offline-database.json")
|
||||
data<-fromJSON("C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\anime-offline-database-master\\anime-offline-database.json")
|
||||
dfAnimes <- as.data.frame(data)
|
||||
library("tidyverse")
|
||||
install.packages("tidyverse")
|
||||
library("tidyverse")
|
||||
dfAnimes
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, data.title, data.type, data.episodes, data.status, data.animeSeason, data.tags)
|
||||
#Nombre d'animes durant plus de 2 cours (24 épisodes) par ans
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>25) %>% longbois
|
||||
#Nombre d'animes durant plus de 2 cours (24 épisodes) par ans
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>25) -> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (24 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) -> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count()-> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count(nbAnimes)-> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count() %>%
|
||||
rename(n = nbAnimes) -> longbois
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an
|
||||
dfAnimes %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(data.animeSeason) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + ggcol()
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + ggcols()
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + geom_cols()
|
||||
longbois %>% ggplot(aes(data, nbAnimes)) + geom_col()
|
||||
longbois %>% ggplot(aes(data.animeSeason, nbAnimes)) + geom_col()
|
||||
longbois %>% ggplot(aes(data.animeSeason$year, nbAnimes)) + geom_col()
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(Lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
View(meanLettersByYear)
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(data.animeSeason$year, lettres)) + geom_col()
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
mutate(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(data.animeSeason$year, lettres)) + geom_col()
|
||||
dfAnimes %>%
|
||||
group_by(data.animeSeason$year) %>%
|
||||
summarise(lettres=median(nchar(data.title))) %>%
|
||||
rename(annee = data.animeSeason$year)-> medLettersByYear
|
||||
#Dedoublage de la colonne saison
|
||||
dfAnimes %>%
|
||||
mutate(annee = data.animeSeason$year) %>%
|
||||
mutate(saison = data.animeSeason$season) -> dfAnimes
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
View(medLettersByYear)
|
||||
View(dfAnimes)
|
||||
View(dfAnimes)
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
View(meanLettersByYear)
|
||||
meanLettersByYear %>% ggplot(aes(data.animeSeason$year, lettres)) + geom_col()
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
dfAnimes %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
filter(data, data.animeSeason$year > 1924) -> post1924
|
||||
filter(dfAnimes, dfAnimes.animeSeason$year > 1924) -> post1924
|
||||
dfAnimes
|
||||
filter(dfAnimes, dfAnimes$annee > 1924) -> post1924
|
||||
filter(post1924,post1924$annee < 2022) -> animeCentury
|
||||
animeCentury
|
||||
View(dfAnimes)
|
||||
View(dfAnimes)
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee < 1924) %>%
|
||||
filter(annee > 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee < 1924) %>%
|
||||
filter(annee > 2021) -> animeCentury
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1924) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
View(animeCentury)
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an => a améliorer
|
||||
animeCentury %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(annee) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
longbois %>% ggplot(aes(annee, nbAnimes)) + geom_col()
|
||||
#Filtrage des animes sortis avant 1925 et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1960) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
66
anime-offline-database-master/.github/CONTRIBUTING.md
esterno
File normale
66
anime-offline-database-master/.github/CONTRIBUTING.md
esterno
File normale
|
@ -0,0 +1,66 @@
|
|||
# Contribution guidelines
|
||||
Please read the FAQ down below.
|
||||
|
||||
## Possible errors / problems in the database
|
||||
If you find something that, in your opinion, could be the result of incorrectly extacted data, please submit an issue rather than creating a pull request, because the database is created by an automated process.
|
||||
|
||||
## Adding your project to the list of projects using this database
|
||||
In case you have a project that uses this database and you want to add it to the list of projects that are using this database, create a pull request adding it to the table. Do not create an issue asking me or anyone else to add it.
|
||||
|
||||
+ You have to be the author/maintainer of the project that you want to add
|
||||
+ Create a PR in which you add it to the table in the README.md
|
||||
+ Do not change/alter anything else
|
||||
+ Your project has to use this database
|
||||
+ You have to have a link back to this project in the README.md of your project
|
||||
+ The README.md of your project has to be in english or it must have an english translation
|
||||
+ Your project has to be hosted either on github or gitlab
|
||||
+ The table is sorted by project name (ascending). Add your entry accordingly.
|
||||
+ Project name must match the repository name and link directly to the source code (not a project page such as YOURNAME.github.io)
|
||||
+ Put your name under _Author/Maintainer_ with a link to your profile.
|
||||
+ Add a meaningful description in english. The description must not be longer than 150 characters.
|
||||
|
||||
# FAQ
|
||||
|
||||
## What do you mean by 'meta data provider'?
|
||||
Websites which provide information about anime such as `myanimelist.net`, `notify.moe`, ...
|
||||
|
||||
## Can you please add additional data/properties?
|
||||
No. The dataset has been created for my own tool. It contains all data/properties that I need and I won't add more data/properties. This is merely an index. The idea is to visit the meta data provider of your choice to get additional information about the anime.
|
||||
|
||||
## Can you please add an additional meta data provider?
|
||||
No. I don't plan to add any additional meta data provider.
|
||||
|
||||
## Can you please change the structure of the file?
|
||||
No. The file has the structure that it needs to have for the purpose it has been built for.
|
||||
|
||||
## There are duplicates in the dataset.
|
||||
If the entry of one meta data provider is not merged with an entry of a different meta data provider, although they are practically the same entry, then this is **not a duplicate**.
|
||||
They are simply not merged together. This can happen and it is intentional. Since this dataset is created automatically two entries should rather not be merged than falsely merged together.
|
||||
If you query this dataset based on titles/synonyms it might seem that there are duplicates. However the intended usage is to query by the url of the meta data provider. This way you will always retrieve the entry that you want. Entries being merged together is just a nice to have.
|
||||
|
||||
A duplicate by defintion of this dataset is an entry which contains multiple links of the same meta data provider in `sources`.
|
||||
|
||||
## Why are there no IDs?
|
||||
There are. The entries under `sources` are the IDs. Each one of the array's URLs is a key for that specific entry.
|
||||
|
||||
## Is this dataset created automatically or manually?
|
||||
It is created automatically and reviewed in a half-automated process.
|
||||
|
||||
## Do you plan to open source the code which creates this dataset?
|
||||
Yes. Parts of the code are already [available](https://github.com/manami-project?tab=repositories&q=modb&type=source). However there is still work to do before I can/want to open source the rest and that doesn't have any priority right now.
|
||||
|
||||
## How do you split entries?
|
||||
Entries are split if one meta data provider lists multiple entries as one and others don't.
|
||||
**Example:**
|
||||
* The entry of a meta data provider which lists 3 Movies as one entry is split from three separate entries of another meta data provider
|
||||
* A series is listed as one entry having 26 episodes on one meta data provider and as two entries having 13 episodes each on the other meta data provider
|
||||
|
||||
However if one entry is listed with 13 episodes whereas the other is listed with 12, because it doesn't count the recap episode then these entries are still merged together.
|
||||
|
||||
## Can I somehow contribute?
|
||||
Currently I can't think of a way. But you can check the [predefined issue templates](https://github.com/manami-project/anime-offline-database/issues/new/choose) in case you want to report to one of the available cases.
|
||||
|
||||
## Does this dataset contain all anime from the supported meta data provider?
|
||||
No. MAL and anisearch are the only provider which list adult titles publicly. So this type of anime is missing for the other meta data providers.
|
||||
If there are new entries which have been created after an update then those obviously won't appear until the next update.
|
||||
Apart from that it should contain all titles from the supported meta data provider.
|
8
anime-offline-database-master/.github/ISSUE_TEMPLATE/config.yml
esterno
File normale
8
anime-offline-database-master/.github/ISSUE_TEMPLATE/config.yml
esterno
File normale
|
@ -0,0 +1,8 @@
|
|||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Guide to add your project to the project list.
|
||||
url: https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#adding-your-project-to-the-list-of-projects-using-this-database
|
||||
about: How to add your project to the list of projects using this database.
|
||||
- name: FAQ
|
||||
url: https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq
|
||||
about: Frequently Asked Questions
|
51
anime-offline-database-master/.github/ISSUE_TEMPLATE/falsely-merged-entries.md
esterno
File normale
51
anime-offline-database-master/.github/ISSUE_TEMPLATE/falsely-merged-entries.md
esterno
File normale
|
@ -0,0 +1,51 @@
|
|||
---
|
||||
name: Falsely merged entry
|
||||
about: Entries have been merged together although they should be separate entries?
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: manami-project
|
||||
|
||||
---
|
||||
|
||||
Please read the [FAQ](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq) first.
|
||||
Especially the sections on [duplicates](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#there-are-duplicates-in-the-data-set) and [splits](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#how-do-you-split-entries). Please refrain from creating issues stating that entries should be merged together. This is only for _splitting_ entries which have already been merged together, but should be separated.
|
||||
**Only one entry per issue**
|
||||
|
||||
## Which entry should be split? (original from data set)
|
||||
|
||||
**Example:**
|
||||
```
|
||||
"https://anidb.net/anime/9466",
|
||||
"https://anilist.co/anime/15809",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer",
|
||||
"https://kitsu.io/anime/7314",
|
||||
"https://myanimelist.net/anime/15809",
|
||||
"https://notify.moe/anime/CGnFpKimR"
|
||||
"https://anidb.net/anime/16104",
|
||||
"https://anilist.co/anime/130592",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer-2",
|
||||
"https://kitsu.io/anime/44113",
|
||||
"https://myanimelist.net/anime/48413",
|
||||
"https://notify.moe/anime/Zy3-TV8MR"
|
||||
```
|
||||
|
||||
## How should it be split?
|
||||
|
||||
**Example:**
|
||||
```
|
||||
"https://anidb.net/anime/9466",
|
||||
"https://anilist.co/anime/15809",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer",
|
||||
"https://kitsu.io/anime/7314",
|
||||
"https://myanimelist.net/anime/15809",
|
||||
"https://notify.moe/anime/CGnFpKimR"
|
||||
```
|
||||
|
||||
```
|
||||
"https://anidb.net/anime/16104",
|
||||
"https://anilist.co/anime/130592",
|
||||
"https://anime-planet.com/anime/the-devil-is-a-part-timer-2",
|
||||
"https://kitsu.io/anime/44113",
|
||||
"https://myanimelist.net/anime/48413",
|
||||
"https://notify.moe/anime/Zy3-TV8MR"
|
||||
```
|
19
anime-offline-database-master/.github/ISSUE_TEMPLATE/problem-in-data-extraction.md
esterno
File normale
19
anime-offline-database-master/.github/ISSUE_TEMPLATE/problem-in-data-extraction.md
esterno
File normale
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
name: Problem in data extraction
|
||||
about: Is there a problem in the data extraction?
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: manami-project
|
||||
|
||||
---
|
||||
|
||||
Please read the [FAQ](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq) first.
|
||||
|
||||
|
||||
* Which data is not extracted correctly? (e.g. title, episodes...)
|
||||
|
||||
|
||||
* Can you provide an example entry?
|
||||
|
||||
|
||||
* Which value is expected?
|
10
anime-offline-database-master/.github/ISSUE_TEMPLATE/question.md
esterno
File normale
10
anime-offline-database-master/.github/ISSUE_TEMPLATE/question.md
esterno
File normale
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
name: Question
|
||||
about: You have a question which was not covered by the FAQ?
|
||||
title: ''
|
||||
labels: question
|
||||
assignees: manami-project
|
||||
|
||||
---
|
||||
|
||||
Please read the [FAQ](https://github.com/manami-project/anime-offline-database/blob/master/.github/CONTRIBUTING.md#faq) first.
|
37
anime-offline-database-master/.github/workflows/json_lint.yml
esterno
File normale
37
anime-offline-database-master/.github/workflows/json_lint.yml
esterno
File normale
|
@ -0,0 +1,37 @@
|
|||
name: Check JSON files
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '**'
|
||||
paths-ignore:
|
||||
- 'README.md'
|
||||
- '.gitignore'
|
||||
- '.gitattributes'
|
||||
- '.github/**/*'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup node environment
|
||||
uses: actions/setup-node@v1
|
||||
with:
|
||||
node-version: '14'
|
||||
- name: Install jsonlint
|
||||
run: npm install jsonlint -g
|
||||
- name: Check anime-offline-database.json
|
||||
run: jsonlint -q anime-offline-database.json
|
||||
- name: Check anime-offline-database-minified.json
|
||||
run: jsonlint -q anime-offline-database-minified.json
|
||||
- name: Check dead-entries for anidb
|
||||
run: jsonlint -q dead-entries/anidb.json
|
||||
- name: Check dead-entries for anilist
|
||||
run: jsonlint -q dead-entries/anilist.json
|
||||
- name: Check dead-entries for kitsu
|
||||
run: jsonlint -q dead-entries/kitsu.json
|
||||
- name: Check dead-entries for livechart
|
||||
run: jsonlint -q dead-entries/livechart.json
|
||||
- name: Check dead-entries for myanimelist
|
||||
run: jsonlint -q dead-entries/myanimelist.json
|
29
anime-offline-database-master/.gitignore
esterno
File normale
29
anime-offline-database-master/.gitignore
esterno
File normale
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
|
||||
!.gitignore
|
||||
!README.md
|
||||
!anime-offline-database.json
|
||||
!anime-offline-database-minified.json
|
||||
|
||||
!.github/
|
||||
.github/*
|
||||
!.github/CONTRIBUTING.md
|
||||
|
||||
!.github/workflows/
|
||||
.github/workflows/*
|
||||
!.github/workflows/json_lint.yml
|
||||
|
||||
!.github/ISSUE_TEMPLATE/
|
||||
.github/ISSUE_TEMPLATE/*
|
||||
!.github/ISSUE_TEMPLATE/problem-in-data-extraction.md
|
||||
!.github/ISSUE_TEMPLATE/question.md
|
||||
!.github/ISSUE_TEMPLATE/falsely-merged-entries.md
|
||||
!.github/ISSUE_TEMPLATE/config.yml
|
||||
|
||||
!dead-entries/
|
||||
dead-entries/*
|
||||
!dead-entries/anidb.json
|
||||
!dead-entries/anilist.json
|
||||
!dead-entries/kitsu.json
|
||||
!dead-entries/myanimelist.json
|
||||
!dead-entries/livechart.json
|
229
anime-offline-database-master/README.md
File normale
229
anime-offline-database-master/README.md
File normale
|
@ -0,0 +1,229 @@
|
|||

|
||||
# anime-offline-database
|
||||
The purpose of this repository is to create an offline database containing anime meta data aggregated by different anime meta data providers (such as myanimelist.net, anidb.net, kitsu.io and more) and allow cross references between those meta data providers. This file is supposed to be used by and created for [manami](https://github.com/manami-project/manami).
|
||||
|
||||
**The goal is to deliver at least weekly updates.**
|
||||
|
||||
## Statistics
|
||||
Update **week 48 [2021]**
|
||||
|
||||
The database consists of **33043** entries composed of:
|
||||
+ 23233 entries from myanimelist.net
|
||||
+ 18215 entries from anime-planet.com
|
||||
+ 17231 entries from kitsu.io
|
||||
+ 16208 entries from anisearch.com
|
||||
+ 15526 entries from anilist.co
|
||||
+ 15175 entries from notify.moe
|
||||
+ 12127 entries from anidb.net
|
||||
+ 9562 entries from livechart.me
|
||||
|
||||
Missed updates:
|
||||
+ **2021:** 0 _(so far)_
|
||||
+ **2020:** 0
|
||||
+ **2019:** 2
|
||||
+ **2018:** 1
|
||||
|
||||
## Structure
|
||||
This repository contains various JSON files. The database file itself as well as one file containing IDs of dead entries for each meta data provider to support the automated process.
|
||||
|
||||
### anime-offline-database-minified.json
|
||||
|
||||
Minified version of `anime-offline-database.json` which contains the same data, but is smaller in size.
|
||||
|
||||
### anime-offline-database.json
|
||||
|
||||
#### Data types
|
||||
|
||||
**Root**
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| data | ```Anime[]``` | no |
|
||||
|
||||
**Anime**
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| sources | ```URL[]``` | no |
|
||||
| title | ```String``` | no |
|
||||
| type | ```Enum of [TV, MOVIE, OVA, ONA, SPECIAL, UNKNOWN]``` | no |
|
||||
| episodes | ```Integer``` | no |
|
||||
| status | ```Enum of [FINISHED, ONGOING, UPCOMING, UNKNOWN]``` | no |
|
||||
| animeSeason | ```AnimeSeason``` | no |
|
||||
| picture | ```URL``` | no |
|
||||
| thumbnail | ```URL``` | no |
|
||||
| synonyms | ```String[]``` | no |
|
||||
| relations | ```URL[]``` | no |
|
||||
| tags | ```String[]``` | no |
|
||||
|
||||
**AnimeSeason**
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| season | ```Enum of [SPRING, SUMMER, FALL, WINTER, UNDEFINED]``` | no |
|
||||
| year | ```Integer``` | yes |
|
||||
|
||||
#### Example:
|
||||
|
||||
```json
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
"sources": [
|
||||
"https://anidb.net/anime/4563",
|
||||
"https://anilist.co/anime/1535",
|
||||
"https://anime-planet.com/anime/death-note",
|
||||
"https://anisearch.com/anime/3633",
|
||||
"https://kitsu.io/anime/1376",
|
||||
"https://livechart.me/anime/3437",
|
||||
"https://myanimelist.net/anime/1535",
|
||||
"https://notify.moe/anime/0-A-5Fimg"
|
||||
],
|
||||
"title": "Death Note",
|
||||
"type": "TV",
|
||||
"episodes": 37,
|
||||
"status": "FINISHED",
|
||||
"animeSeason": {
|
||||
"season": "FALL",
|
||||
"year": 2006
|
||||
},
|
||||
"picture": "https://cdn.myanimelist.net/images/anime/9/9453.jpg",
|
||||
"thumbnail": "https://cdn.myanimelist.net/images/anime/9/9453t.jpg",
|
||||
"synonyms": [
|
||||
"Bilježnica smrti",
|
||||
"Caderno da Morte",
|
||||
"Carnet de la Mort",
|
||||
"DEATH NOTE",
|
||||
"DN",
|
||||
"Death Note - A halállista",
|
||||
"Death Note - Carnetul morţii",
|
||||
"Death Note - Zápisník smrti",
|
||||
"Mirties Užrašai",
|
||||
"Notatnik śmierci",
|
||||
"Notes Śmierci",
|
||||
"Quaderno della Morte",
|
||||
"Sveska Smrti",
|
||||
"Ölüm Defteri",
|
||||
"Τετράδιο Θανάτου",
|
||||
"Бележник на Смъртта",
|
||||
"Записник Смерті",
|
||||
"Свеска Смрти",
|
||||
"Тетрадка на Смъртта",
|
||||
"Тетрадь cмерти",
|
||||
"Үхлийн Тэмдэглэл",
|
||||
"מחברת המוות",
|
||||
"دفترچه مرگ",
|
||||
"دفترچه یادداشت مرگ",
|
||||
"كـتـاب الـموت",
|
||||
"مدونة الموت",
|
||||
"مذكرة الموت",
|
||||
"موت نوٹ",
|
||||
"डेथ नोट",
|
||||
"ですのーと",
|
||||
"デスノート",
|
||||
"死亡笔记",
|
||||
"데스노트"
|
||||
],
|
||||
"relations": [
|
||||
"https://anidb.net/anime/8146",
|
||||
"https://anidb.net/anime/8147",
|
||||
"https://anilist.co/anime/2994",
|
||||
"https://anime-planet.com/anime/death-note-rewrite-1-visions-of-a-god",
|
||||
"https://anime-planet.com/anime/death-note-rewrite-2-ls-successors",
|
||||
"https://anisearch.com/anime/4441",
|
||||
"https://anisearch.com/anime/5194",
|
||||
"https://kitsu.io/anime/2707",
|
||||
"https://livechart.me/anime/3808",
|
||||
"https://myanimelist.net/anime/2994",
|
||||
"https://notify.moe/anime/DBBU5Kimg"
|
||||
],
|
||||
"tags": [
|
||||
"alternative present",
|
||||
"amnesia",
|
||||
"anti-hero",
|
||||
"asexual",
|
||||
"asia",
|
||||
"based on a manga",
|
||||
"contemporary fantasy",
|
||||
"cops",
|
||||
"crime",
|
||||
"crime fiction",
|
||||
"criminals",
|
||||
"detective",
|
||||
"detectives",
|
||||
"drama",
|
||||
"earth",
|
||||
"espionage",
|
||||
"fantasy",
|
||||
"genius",
|
||||
"gods",
|
||||
"hero of strong character",
|
||||
"horror",
|
||||
"japan",
|
||||
"kamis",
|
||||
"kuudere",
|
||||
"male protagonist",
|
||||
"manga",
|
||||
"mind games",
|
||||
"mystery",
|
||||
"overpowered main characters",
|
||||
"philosophy",
|
||||
"plot continuity",
|
||||
"police",
|
||||
"policeman",
|
||||
"present",
|
||||
"primarily adult cast",
|
||||
"primarily male cast",
|
||||
"psychological",
|
||||
"psychological drama",
|
||||
"psychopaths",
|
||||
"revenge",
|
||||
"rivalries",
|
||||
"secret identity",
|
||||
"serial killers",
|
||||
"shinigami",
|
||||
"shounen",
|
||||
"supernatural",
|
||||
"supernatural drama",
|
||||
"thriller",
|
||||
"time skip",
|
||||
"tragedy",
|
||||
"twisted story",
|
||||
"university",
|
||||
"urban",
|
||||
"urban fantasy",
|
||||
"vigilantes"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### dead-entries
|
||||
Contains IDs which have been removed from the database of the corresponding meta data provider.
|
||||
|
||||
#### Data types
|
||||
|
||||
| Field | Type | Nullable |
|
||||
| --- | --- | --- |
|
||||
| deadEntries | ```String[]``` | no |
|
||||
|
||||
#### Example
|
||||
|
||||
```json
|
||||
{
|
||||
"deadEntries": [
|
||||
"38492",
|
||||
"38518",
|
||||
"38522",
|
||||
"38531"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Other projects using this database
|
||||
If you have a project that uses this database and you want to add it to this list, please read the [contribution guidelines](./.github/CONTRIBUTING.md) first.
|
||||
|
||||
|Project|Author/Maintainer|Short description|
|
||||
|----|----|----|
|
||||
|[adb-zeppelin-statistics](https://github.com/manami-project/adb-zeppelin-statistics)|[manami-project](https://github.com/manami-project)|A set of statistics and insights about anime on MAL.|
|
||||
|[animanga-wordlist](https://github.com/ryuuganime/animanga-wordlist)|[ryuuganime](https://github.com/ryuuganime)|Japanese Anime, Manga, Characters, and Studio Word List/Dictionary|
|
||||
|[arm-server](https://github.com/BeeeQueue/arm-server)|[BeeeQueue](https://github.com/BeeeQueue)|A REST API for querying this database.|
|
||||
|[manami](https://github.com/manami-project/manami)|[manami-project](https://github.com/manami-project)|A tool to catalog anime on your hard drive and discover new anime to watch.|
|
File diff soppresso perché una o più righe sono troppo lunghe
1442843
anime-offline-database-master/anime-offline-database.json
File normale
1442843
anime-offline-database-master/anime-offline-database.json
File normale
File diff soppresso perché troppo grande
Carica differenze
4532
anime-offline-database-master/dead-entries/anidb.json
File normale
4532
anime-offline-database-master/dead-entries/anidb.json
File normale
File diff soppresso perché troppo grande
Carica differenze
126697
anime-offline-database-master/dead-entries/anilist.json
File normale
126697
anime-offline-database-master/dead-entries/anilist.json
File normale
File diff soppresso perché troppo grande
Carica differenze
28317
anime-offline-database-master/dead-entries/kitsu.json
File normale
28317
anime-offline-database-master/dead-entries/kitsu.json
File normale
File diff soppresso perché troppo grande
Carica differenze
1356
anime-offline-database-master/dead-entries/livechart.json
File normale
1356
anime-offline-database-master/dead-entries/livechart.json
File normale
File diff soppresso perché troppo grande
Carica differenze
27259
anime-offline-database-master/dead-entries/myanimelist.json
File normale
27259
anime-offline-database-master/dead-entries/myanimelist.json
File normale
File diff soppresso perché troppo grande
Carica differenze
52
main.R
File normale
52
main.R
File normale
|
@ -0,0 +1,52 @@
|
|||
library("jsonlite")
|
||||
library("tidyverse")
|
||||
|
||||
|
||||
|
||||
data<-fromJSON("C:\\Users\\Marianne\\Desktop\\projet-analyse-exploratoire\\anime-offline-database-master\\anime-offline-database.json")
|
||||
dfAnimes <- as.data.frame(data)
|
||||
|
||||
#Nettoyage des colonnes non utilisées
|
||||
dfAnimes <- select(dfAnimes, data.title, data.type, data.episodes, data.status, data.animeSeason, data.tags)
|
||||
|
||||
#Dedoublage de la colonne saison
|
||||
dfAnimes %>%
|
||||
mutate(annee = data.animeSeason$year) %>%
|
||||
mutate(saison = data.animeSeason$season) -> dfAnimes
|
||||
|
||||
#Filtrage des animes sortis avant 1960 (pas de télé :() et après 2021 et des OVAs, films, etc
|
||||
dfAnimes %>%
|
||||
filter(annee > 1960) %>%
|
||||
filter(annee < 2021) %>%
|
||||
filter(data.type == "TV") -> animeCentury
|
||||
|
||||
|
||||
|
||||
#Nombre d'animes durant plus de 2 cours (estimés à 30 épisodes) par an => a améliorer
|
||||
animeCentury %>%
|
||||
filter(data.episodes>30) %>%
|
||||
group_by(annee) %>%
|
||||
count() %>%
|
||||
rename(nbAnimes = n) -> longbois
|
||||
|
||||
longbois %>% ggplot(aes(annee, nbAnimes)) + geom_col()
|
||||
|
||||
#Evolution du nombre de lettre (moyenne et médiane) dans les titres par année
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=mean(nchar(data.title))) -> meanLettersByYear
|
||||
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
summarise(lettres=median(nchar(data.title))) -> medLettersByYear
|
||||
|
||||
meanLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
|
||||
medLettersByYear %>% ggplot(aes(annee, lettres)) + geom_col()
|
||||
|
||||
#Evolution des 5 tags les plus représentés
|
||||
animeCentury %>%
|
||||
group_by(annee) %>%
|
||||
|
||||
|
||||
|
BIN
tp-analyse-explorative-master.zip
File normale
BIN
tp-analyse-explorative-master.zip
File normale
File binario non mostrato.
35
tp-analyse-explorative-master/tp-analyse-explorative/.gitignore
esterno
File normale
35
tp-analyse-explorative-master/tp-analyse-explorative/.gitignore
esterno
File normale
|
@ -0,0 +1,35 @@
|
|||
# ---> R
|
||||
# History files
|
||||
.Rhistory
|
||||
.Rapp.history
|
||||
|
||||
# Session Data files
|
||||
.RData
|
||||
|
||||
# Example code in package build process
|
||||
*-Ex.R
|
||||
|
||||
# Output files from R CMD build
|
||||
/*.tar.gz
|
||||
|
||||
# Output files from R CMD check
|
||||
/*.Rcheck/
|
||||
|
||||
# RStudio files
|
||||
.Rproj.user/
|
||||
|
||||
# produced vignettes
|
||||
vignettes/*.html
|
||||
vignettes/*.pdf
|
||||
|
||||
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
|
||||
.httr-oauth
|
||||
|
||||
# knitr and R markdown default cache directories
|
||||
/*_cache/
|
||||
/cache/
|
||||
|
||||
# Temporary files created by R markdown
|
||||
*.utf8.md
|
||||
*.knit.md
|
||||
|
File binario non mostrato.
|
@ -0,0 +1,2 @@
|
|||
# tp-analyse-explorative
|
||||
|
125
tp-analyse-explorative-master/tp-analyse-explorative/exo1.R
File normale
125
tp-analyse-explorative-master/tp-analyse-explorative/exo1.R
File normale
|
@ -0,0 +1,125 @@
|
|||
### 1 Vectors ###
|
||||
|
||||
### 1.1 Let's start simple
|
||||
e1 = c(2,5,0,8)
|
||||
e2 = 1:200
|
||||
e3 = seq(-210,-200,2)
|
||||
e4 = 2^1:7
|
||||
v = rep(c(1,-1),times=25)
|
||||
e5 = c(e2,e3)
|
||||
?seq
|
||||
e6 = seq(0,1,length=70)
|
||||
e7 = rep(e1,times=10)
|
||||
e2-e3
|
||||
|
||||
|
||||
### 1.2 Character vectors
|
||||
# All vowels
|
||||
vowels = c('a','e','i','o','u','y')
|
||||
# All letters
|
||||
letters = letters
|
||||
# True or false if vowels are in letters
|
||||
letters %in% vowels
|
||||
# Index of each vowels
|
||||
vowelsInLetters = which(letters %in% vowels)
|
||||
# Index of each non-vowels
|
||||
notVowelsInLetters = which(!(letters %in% vowels))
|
||||
# Letters after vowels
|
||||
lettersAfterVowels = letters[vowelsInLetters+1]
|
||||
# myname with my name
|
||||
myname = "Titouan"
|
||||
# strsplit to extract letters
|
||||
mynameSplited = strsplit(myname, NULL)
|
||||
# Access its first element
|
||||
mynameSplited[[1]][1]
|
||||
mynameSplited[[1]]
|
||||
mynameSplited[1]
|
||||
# Index in alphabet of letters in my name
|
||||
indexMyname = which(letters %in% mynameSplited[[1]])
|
||||
# Index in alphabet of letters in my neighbour name
|
||||
neighborname = "Marianne"
|
||||
neighbornameSplited = strsplit(neighborname, NULL)
|
||||
neighbornameSplited[[1]][1]
|
||||
indexNeighborname = which(letters %in% neighbornameSplited[[1]])
|
||||
# Min of average of index
|
||||
mynameAverage = mean(indexMyname)
|
||||
neighborAverage = mean(indexNeighborname)
|
||||
minIndexOfName = min(c(mynameAverage,neighborAverage))
|
||||
|
||||
### 2 DataFrames ###
|
||||
|
||||
### 2.1 Cute Animals
|
||||
# All vowels
|
||||
vowels = c('a','e','i','o','u','y')
|
||||
# All letters
|
||||
letters = letters
|
||||
# Database of 3 columns : alphabet letters, number of each letter, binary variable vowel
|
||||
database = data.frame(letter=letters, index=1:26, isVowels=letters %in% vowels)
|
||||
# Extracting lines corresponding to my name
|
||||
indexMyname = letters %in% strsplit("Titouan", NULL)[[1]]
|
||||
database[indexMyname,"index"]
|
||||
# Examining msleep dataset
|
||||
library(tidyverse)
|
||||
head(msleep)
|
||||
str(msleep)
|
||||
names(msleep)
|
||||
summary
|
||||
# Sanity check animals either awake or asleep
|
||||
which(msleep$sleep_total + msleep$awake == 24 )
|
||||
# Animal that sleep the most
|
||||
msleep[which.max(msleep$sleep_total),]
|
||||
# Animals of less than 100g and that sleeps more than half a day
|
||||
nrow(msleep[which(msleep$sleep_total > 12 & msleep$bodywt < 0.1),])
|
||||
# Average brainwt/bodywt ratio (ajoute une colonne ratio)
|
||||
msleep$ratio = msleep$brainwt/msleep$bodywt
|
||||
mean(msleep$ratio, na.rm = TRUE)
|
||||
# Animal with the highest ratio
|
||||
msleep[which.max(msleep$ratio),]
|
||||
|
||||
### 2.2 Endangered animals
|
||||
|
||||
# Create a copy and reorder its factors
|
||||
msleep_copy = msleep
|
||||
msleep_copy$conservation = factor(x = msleep_copy$conservation, c("lc","domesticated","cd","nt","vu","en"), ordered = TRUE)
|
||||
# Compare average weight of endangered animals to non-endangered
|
||||
averageWtThreatened = mean(msleep_copy[which(msleep_copy$conservation > "nt"),]$bodywt)
|
||||
averageWtRemaining = mean(msleep_copy[which(msleep_copy$conservation <= "nt"),]$bodywt)
|
||||
|
||||
# Ajoute une colonne threatened, valeur booléenne
|
||||
msleep$threatened = msleep$conservation > "nt"
|
||||
|
||||
|
||||
### 2.3 Functions
|
||||
|
||||
# Create a function taking a name as input and returning corresponding list of letters
|
||||
lettersFromName <- function(name) {
|
||||
return(strsplit(name, NULL)[[1]])
|
||||
}
|
||||
lettersFromName("Titouan")
|
||||
# Fix with empty "" name
|
||||
lettersFromName("")
|
||||
lettersFromNameFixed <- function(name) {
|
||||
if (name == "") {
|
||||
l = NULL
|
||||
}
|
||||
else {
|
||||
l = strsplit(name, NULL)[[1]]
|
||||
}
|
||||
return(l)
|
||||
}
|
||||
lettersFromNameFixed("")
|
||||
# With non-standards characters
|
||||
lettersFromNameFixed("X Æ A-12")
|
||||
# Function returning corresponding genus from animal name
|
||||
genusFromName <- function(name) {
|
||||
genusname <- msleep[tolower(msleep$name) == tolower(name),]$genus
|
||||
if (length(genusname) == 0) {
|
||||
s <- c("I don't know")
|
||||
}
|
||||
else {
|
||||
s <- c("The ",name," is a ", genusname)
|
||||
}
|
||||
#browser()
|
||||
return(paste(s, collapse = ''))
|
||||
}
|
||||
genusFromName("little brown bat")
|
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo1.pdf
File normale
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo1.pdf
File normale
File binario non mostrato.
315
tp-analyse-explorative-master/tp-analyse-explorative/exo2.R
File normale
315
tp-analyse-explorative-master/tp-analyse-explorative/exo2.R
File normale
|
@ -0,0 +1,315 @@
|
|||
library(tidyverse)
|
||||
library(directlabels)
|
||||
|
||||
####### 1 Plot and Given names #######
|
||||
### Exploring the Pink City
|
||||
# read the table prenoms.csv
|
||||
prenoms <- read.csv('/home/labourde/Bureau/tp-analyse-explorative/prenoms.csv',sep=";")
|
||||
# Inspect it
|
||||
head(prenoms)
|
||||
# Plot the number of births by year
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point()
|
||||
# Plot the number of male/female births by year
|
||||
prenoms %>%
|
||||
group_by(Année,Sexe) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point(aes(color=Sexe))
|
||||
# Is my name in the dataset ?
|
||||
prenoms[tolower(prenoms$Prénom) == tolower("Titouan"),]
|
||||
# Represent the 10 most given names
|
||||
prenoms %>%
|
||||
group_by(Prénom) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
arrange(desc(Naissances)) -> mostGivenNames
|
||||
ggplot(head(mostGivenNames,10),aes(x=Prénom,y=Naissances)) +
|
||||
geom_bar(stat='Identity',aes(fill=Prénom))
|
||||
# Select for each year the top 5 given names by sex and their evolution through the years
|
||||
prenoms[prenoms$Sexe == "M",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostMaleGivenNamesByYear
|
||||
prenoms[prenoms$Sexe == "F",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostFemaleGivenNamesByYear
|
||||
mostGivenNamesBySexeAndYear <- rbind(mostMaleGivenNamesByYear, mostFemaleGivenNamesByYear)
|
||||
ggplot(mostGivenNamesBySexeAndYear,aes(x=Année,y=Nombre)) +
|
||||
geom_point(aes(color=Prénom)) +
|
||||
facet_wrap("Prénom")
|
||||
# Plot the average numbers of letters by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Lettres=mean(nchar(Prénom))) -> averageNumberOfLettersByYear
|
||||
ggplot(averageNumberOfLettersByYear,aes(x=Année,y=Lettres)) +
|
||||
geom_line()
|
||||
# Plot the average numbers of vowels/consonants by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(
|
||||
Vowels = mean(nchar(gsub("[éèêëàâäïaeiouy]", "", tolower(Prénom)))),
|
||||
Consonants = mean(nchar(gsub("[zrtpqsdfghjklmwxcvbnç]", "", tolower(Prénom))))
|
||||
) -> averageNumberOfVowelsAndConsonantsByYear
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Vowels)) +
|
||||
geom_point()
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Consonants)) +
|
||||
geom_point()
|
||||
# How the number of composed names change through the years
|
||||
prenoms[str_detect(prenoms$Prénom, regex("[a-zA-Zéèàï]+-[a-zA-ZéèàÎ]+")),] %>%
|
||||
group_by(Année) %>%
|
||||
summarise(nb_composedNames=sum(Nombre)) -> composedNamesByYear
|
||||
ggplot(composedNamesByYear,aes(x=Année,y=nb_composedNames)) +
|
||||
geom_line()
|
||||
# Defining a "hype" criteria and finding the hypest names
|
||||
prenoms %>%
|
||||
group_by(Année) -> namesByYear
|
||||
namesByYear$nbAnnéePrec <- lag(namesByYear$Nombre)
|
||||
namesByYear$difference <- namesByYear$Nombre - namesByYear$nbAnnéePrec
|
||||
|
||||
namesByYear %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = difference, n = 1) -> hypestNameByYear
|
||||
ggplot(hypestNameByYear,aes(x=factor(Année),y=difference)) +
|
||||
geom_text(aes(label=Prénom))
|
||||
|
||||
### Exploring the Gray City
|
||||
# read the table prenomsParis.csv
|
||||
prenomsParis <- read.csv('/home/labourde/Bureau/tp-analyse-explorative/prenomsParis.csv',sep=";")
|
||||
prenomsParis <-rename(prenomsParis, Année = Annee)
|
||||
prenomsParis <-rename(prenomsParis, Prénom = Prenoms)
|
||||
|
||||
nameStudy <- function(prenoms) {
|
||||
# Plot the number of births by year
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
ggtitle("Naissances par année") +
|
||||
geom_point() -> p1
|
||||
# Plot the number of male/female births by year
|
||||
prenoms %>%
|
||||
group_by(Année,Sexe) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
ggtitle("Sexe des naissances par année") +
|
||||
geom_point(aes(color=Sexe)) -> p2
|
||||
# Is my name in the dataset ?
|
||||
prenoms[tolower(prenoms$Prénom) == tolower("Titouan"),]
|
||||
# Represent the 10 most given names
|
||||
prenoms %>%
|
||||
group_by(Prénom) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
arrange(desc(Naissances)) -> mostGivenNames
|
||||
ggplot(head(mostGivenNames,10),aes(x=reorder(Prénom,Naissances),y=Naissances)) +
|
||||
ggtitle("Les 10 noms les plus donnés") +
|
||||
geom_bar(stat='Identity',aes(fill=reorder(Prénom,Naissances))) -> p3
|
||||
# Select for each year the top 5 given names by sex and their evolution through the years
|
||||
prenoms[prenoms$Sexe == "M",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostMaleGivenNamesByYear
|
||||
prenoms[prenoms$Sexe == "F",] %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostFemaleGivenNamesByYear
|
||||
mostGivenNamesBySexeAndYear <- rbind(mostMaleGivenNamesByYear, mostFemaleGivenNamesByYear)
|
||||
ggplot(mostGivenNamesBySexeAndYear,aes(x=Année,y=Nombre)) +
|
||||
ggtitle("Top 5 des noms donnés, par sexe et par an") +
|
||||
geom_point(aes(color=Prénom)) +
|
||||
facet_wrap("Prénom") -> p4
|
||||
# Plot the average numbers of letters by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(Lettres=mean(nchar(Prénom))) -> averageNumberOfLettersByYear
|
||||
ggplot(averageNumberOfLettersByYear,aes(x=Année,y=Lettres)) +
|
||||
ggtitle("Nombre moyen de lettres dans les prénoms par année") +
|
||||
geom_line() -> p5
|
||||
# Plot the average numbers of vowels/consonants by years
|
||||
prenoms %>%
|
||||
group_by(Année) %>%
|
||||
summarise(
|
||||
Vowels = mean(nchar(gsub("[éèêëàâäïaeiouy]", "", tolower(Prénom)))),
|
||||
Consonants = mean(nchar(gsub("[zrtpqsdfghjklmwxcvbnç]", "", tolower(Prénom))))
|
||||
) -> averageNumberOfVowelsAndConsonantsByYear
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Vowels)) +
|
||||
ggtitle("Nombre moyen de voyelles dans les prénoms par année") +
|
||||
geom_point() -> p6
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Consonants)) +
|
||||
ggtitle("Nombre moyen de consonnes dans les prénoms par année") +
|
||||
geom_point() -> p7
|
||||
# How the number of composed names change through the years
|
||||
prenoms[str_detect(prenoms$Prénom, regex("[a-zA-Zéèàï]+-[a-zA-ZéèàÎ]+")),] %>%
|
||||
group_by(Année) %>%
|
||||
summarise(nb_composedNames=sum(Nombre)) -> composedNamesByYear
|
||||
ggplot(composedNamesByYear,aes(x=Année,y=nb_composedNames)) +
|
||||
ggtitle("Nombre de prénoms composés par année") +
|
||||
geom_line() -> p8
|
||||
# Defining a "hype" criteria and finding the hypest names
|
||||
prenoms %>%
|
||||
group_by(Année) -> namesByYear
|
||||
namesByYear$nbAnnéePrec <- lag(namesByYear$Nombre)
|
||||
namesByYear$difference <- namesByYear$Nombre - namesByYear$nbAnnéePrec
|
||||
namesByYear %>%
|
||||
group_by(Année) %>%
|
||||
slice_max(order_by = difference, n = 1) -> hypestNameByYear
|
||||
ggplot(hypestNameByYear,aes(x=factor(Année),y=difference)) +
|
||||
ggtitle("Prénom le plus hype par année") +
|
||||
geom_text(aes(label=Prénom)) -> p9
|
||||
return(list(p1,p2,p3,p4,p5,p6,p7,p8,p9))
|
||||
}
|
||||
plotsParis <- nameStudy(prenomsParis)
|
||||
plotsParis
|
||||
|
||||
# A tale of two (or more) cities
|
||||
|
||||
prenoms <- subset( prenoms, select = -Ordre )
|
||||
prenoms$Ville <- "Toulouse"
|
||||
prenomsParis$Ville <- "Paris"
|
||||
allPrenoms <- rbind(prenoms,prenomsParis)
|
||||
allPrenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
mutate(NaissancesVilleAnnée=sum(Nombre)) -> allPrenoms
|
||||
|
||||
nameStudyCombined <- function(prenoms) {
|
||||
# Plot the number of births by year
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
summarise(Naissances=sum(Nombre)) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) +
|
||||
ggtitle("Naissances par année") -> p1
|
||||
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
mutate(NaissancesVilleAnnée=sum(Nombre)) -> prenoms
|
||||
|
||||
# Plot the number of male/female births by year and by cities
|
||||
prenoms %>%
|
||||
group_by(Année,Sexe,Ville) %>%
|
||||
summarise(Naissances=100*sum(Nombre)/NaissancesVilleAnnée) %>%
|
||||
ggplot(aes(x=Année,y=Naissances)) +
|
||||
geom_point(aes(color=Sexe)) +
|
||||
facet_wrap("Ville") +
|
||||
ggtitle("Sexe des naissances par année en %") -> p2
|
||||
|
||||
# Represent the 10 most given names by cities
|
||||
prenoms %>%
|
||||
group_by(Ville) %>%
|
||||
mutate(NaissancesVille = sum(Nombre)) %>%
|
||||
group_by(Prénom,Ville) %>%
|
||||
summarise(Naissances=100*sum(Nombre)/sum(NaissancesVille)) %>%
|
||||
group_by(Ville) %>%
|
||||
slice_max(order_by = Naissances, n = 10) -> mostGivenNames
|
||||
ggplot(mostGivenNames,aes(x=reorder(Prénom,Naissances),y=Naissances)) +
|
||||
ggtitle("Les 10 noms les plus donnés par ville") +
|
||||
geom_bar(stat='Identity',aes(fill=reorder(Prénom,Naissances))) +
|
||||
facet_wrap("Ville") -> p3
|
||||
|
||||
# Select for each year the top 5 given names by sex and their evolution through the years by cites
|
||||
tryCatch(
|
||||
{
|
||||
prenoms[prenoms$Sexe == "M",] %>%
|
||||
group_by(Ville,Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostMaleGivenNamesByYear
|
||||
prenoms[prenoms$Sexe == "F",] %>%
|
||||
group_by(Ville,Année) %>%
|
||||
slice_max(order_by = Nombre, n = 5) -> mostFemaleGivenNamesByYear
|
||||
mostGivenNamesBySexeAndYear <- rbind(mostMaleGivenNamesByYear, mostFemaleGivenNamesByYear)
|
||||
mostGivenNamesBySexeAndYear %>%
|
||||
group_by(Prénom,Ville,Année) %>%
|
||||
mutate(Naissances=100*sum(Nombre)/sum(NaissancesVilleAnnée)) -> mostGivenNamesBySexeAndYearNormalized
|
||||
ggplot(mostGivenNamesBySexeAndYearNormalized,aes(x=Année,y=Naissances)) +
|
||||
ggtitle("Top 5 des noms donnés, par sexe, par an et par ville") +
|
||||
geom_line(aes(color=Prénom)) +
|
||||
geom_point(aes(color=Prénom,shape=Sexe),size=3) +
|
||||
geom_dl(aes(label=Prénom), method=list(dl.combine("first.points")), cex = 0.8) +
|
||||
facet_wrap("Ville")
|
||||
},
|
||||
error=function(e) e
|
||||
) -> p4
|
||||
|
||||
|
||||
# Plot the average numbers of letters by years by cities
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) %>%
|
||||
summarise(Lettres=mean(nchar(Prénom))) -> averageNumberOfLettersByYear
|
||||
ggplot(averageNumberOfLettersByYear,aes(x=Année,y=Lettres)) +
|
||||
ggtitle("Nombre moyen de lettres dans les prénoms par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p5
|
||||
|
||||
# Plot the average numbers of vowels/consonants by years and by cities
|
||||
prenoms %>%
|
||||
group_by(Ville, Année) %>%
|
||||
summarise(
|
||||
Vowels = mean(nchar(gsub("[éèêëàâäïaeiouy]", "", tolower(Prénom)))),
|
||||
Consonants = mean(nchar(gsub("[zrtpqsdfghjklmwxcvbnç]", "", tolower(Prénom))))
|
||||
) -> averageNumberOfVowelsAndConsonantsByYear
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Vowels)) +
|
||||
ggtitle("Nombre moyen de voyelles dans les prénoms par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p6
|
||||
ggplot(averageNumberOfVowelsAndConsonantsByYear, aes(x=Année,y=Consonants)) +
|
||||
ggtitle("Nombre moyen de consonnes dans les prénoms par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p7
|
||||
|
||||
# How the number of composed names change through the years
|
||||
prenoms[str_detect(prenoms$Prénom, regex("[a-zA-Zéèàï]+-[a-zA-ZéèàÎ]+")),] %>%
|
||||
group_by(Année,Ville) %>%
|
||||
summarise(nb_composedNames=100*sum(Nombre)/NaissancesVilleAnnée) -> composedNamesByYear
|
||||
ggplot(composedNamesByYear,aes(x=Année,y=nb_composedNames)) +
|
||||
ggtitle("Nombre de prénoms composés par année et par ville") +
|
||||
geom_point(aes(color=Ville)) +
|
||||
geom_line(aes(color=Ville)) -> p8
|
||||
|
||||
# Defining a "hype" criteria and finding the hypest names
|
||||
prenoms %>%
|
||||
group_by(Année,Ville) -> namesByYearAndCities
|
||||
namesByYearAndCities$nbAnnéePrec <- lag(namesByYearAndCities$Nombre)
|
||||
namesByYearAndCities$difference <- namesByYearAndCities$Nombre - namesByYearAndCities$nbAnnéePrec
|
||||
namesByYearAndCities %>%
|
||||
group_by(Année,Ville) %>%
|
||||
slice_max(order_by = difference, n = 1) -> hypestNameByYearAndCities
|
||||
ggplot(hypestNameByYearAndCities,aes(x=factor(Année),y=difference/NaissancesVilleAnnée)) +
|
||||
ggtitle("Prénom le plus hype par année et par ville") +
|
||||
geom_text(aes(label=Prénom,color=Prénom)) +
|
||||
facet_wrap("Ville") -> p9
|
||||
|
||||
return(list(p1,p2,p3,p4,p5,p6,p7,p8,p9))
|
||||
}
|
||||
|
||||
combinedParisToulouse <- nameStudyCombined(allPrenoms)
|
||||
combinedParisToulouse
|
||||
|
||||
# The most unshared names
|
||||
allPrenoms %>%
|
||||
group_by(Ville) %>%
|
||||
mutate(NaissancesVille=sum(Nombre)) %>%
|
||||
group_by(Prénom,Ville) %>%
|
||||
mutate(Proportion=sum(Nombre)/NaissancesVille) %>%
|
||||
group_by(Prénom,Ville) -> namesByCities
|
||||
namesByCities$autreVilleProportion <- lag(namesByCities$Proportion)
|
||||
namesByCities$difference <- namesByCities$Proportion - namesByCities$autreVilleProportion
|
||||
|
||||
|
||||
### A Tale of many cities
|
||||
# read the table prenomsRennesStrassNantesToul.csv
|
||||
prenomsRennesStrassNantesToul <- read.csv('/home/labourde/Bureau/tp-analyse-explorative/prenomsRennesStrassNantesToul.csv',sep=";")
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Année = ANNAISS)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Ville = LBCOM)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Sexe = SEX)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Prénom = PRN)
|
||||
prenomsRennesStrassNantesToul <-rename(prenomsRennesStrassNantesToul, Nombre = NRB)
|
||||
prenomsRennesStrassNantesToul[prenomsRennesStrassNantesToul$Ville=="RENNES",] -> a
|
||||
combinedRennesStrassNantesToul <- nameStudyCombined(prenomsRennesStrassNantesToul)
|
||||
combinedRennesStrassNantesToul[1]
|
||||
combinedRennesStrassNantesToul[2]
|
||||
combinedRennesStrassNantesToul[3]
|
||||
combinedRennesStrassNantesToul[4]
|
||||
combinedRennesStrassNantesToul[5]
|
||||
combinedRennesStrassNantesToul[6]
|
||||
combinedRennesStrassNantesToul[7]
|
||||
combinedRennesStrassNantesToul[8]
|
||||
combinedRennesStrassNantesToul[9]
|
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo2.pdf
File normale
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo2.pdf
File normale
File binario non mostrato.
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo3.pdf
File normale
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo3.pdf
File normale
File binario non mostrato.
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo4.pdf
File normale
BIN
tp-analyse-explorative-master/tp-analyse-explorative/exo4.pdf
File normale
File binario non mostrato.
File diff soppresso perché troppo grande
Carica differenze
5251
tp-analyse-explorative-master/tp-analyse-explorative/prenoms.csv
File normale
5251
tp-analyse-explorative-master/tp-analyse-explorative/prenoms.csv
File normale
File diff soppresso perché troppo grande
Carica differenze
11001
tp-analyse-explorative-master/tp-analyse-explorative/prenomsParis.csv
File normale
11001
tp-analyse-explorative-master/tp-analyse-explorative/prenomsParis.csv
File normale
File diff soppresso perché troppo grande
Carica differenze
File diff soppresso perché troppo grande
Carica differenze
Caricamento…
Crea riferimento in una nuova segnalazione