Projet BERTA Pauline NUNES Joao

This commit is contained in:
Joao Conceicao Nunes 2020-12-17 11:46:22 +01:00
commit ce6a7448ae
143 changed files with 1990232 additions and 0 deletions

BIN
.RData Normal file

Binary file not shown.

507
.Rhistory Normal file
View file

@ -0,0 +1,507 @@
#
r2 = which(r1 == TRUE)
#
vowels = c('a','e','i','o','u','y')
alphabet = letters
r1 = letters %in% vowels
#Le result est un vecteur de boolean
#avec true quand les deux elements sont
#egaux, et false quand c l'inverse
v1 = seq(6)
#
#r2 = which(r1 == TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
?which
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
?letters
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
?which
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
?strsplit
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
myname_split[0]
myname_split[1]
myname_split[1][0]
myname_split[2
myname_split[2]
myname_split[2].[0]
myname_split[1].[0]
myname_split[[1]][0]
myname_split[[1]][1]
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
print(r6)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP1_2.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
rep(c(0,1),13)
c(1,26,1)
seq(1,26,1)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
install.packages("ggplot2")
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
head
msleep
head?
s
?head
head(msleep)
?names
?head
head(letters)
letters
head(letters, n = 7)
head(msleep, n=2)
?str
str(1:12)
str(freeny)
str(msleep)
?summary
sumamry(letters)
summary(letters)
summary(msleep)
msleep
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
msleep
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
msleep
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
msleep
msleep
?na.omit
msleep
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
summary("pauline")
summary('p','a','u')
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
summary(strsplit(Pauline, NULL))
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
summary(strsplit("Pauline", NULL))
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
summary(strsplit("Pauline", NULL))
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
View(p600)
p600[[1]][1]
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
letters %in% p600
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', encoding = 'ASCII', echo=TRUE)
?substr
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
msleep
which(msleep[[1]] ==Cow)
which(msleep[[1]] =="Cow")
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
msleep
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
which( msleep[[1]] == "w")
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
p777 <- which( msleep[[1]] == "w")
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
install.packages("rapportools")
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
source('C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/TP2_1.R', echo=TRUE)
n
n
n
n
library(tidyverse)
library(ggplot2)
setwd("C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/Projet")
##########################
#Acquisition of the data #
##########################
#Variable containing all the information
tab <- read.table("yob1880.txt",h=FALSE,sep=",")
Year <- matrix(1880,ncol=1,nrow = nrow(tab))
tab <- cbind(Year,tab)
for (i in seq(1881,2019,1)){
#Dynamic name for the source txt file
namesource <- "yob"
namesource <- paste(c(namesource,i),collapse = "")
namesource <- paste (c(namesource,".txt"), collapse="")
# Table used inside the for loop to extrat the informations
tabinter <- read.table(namesource,h=FALSE,sep=",")
#Converting the table into the format "Year,Name,Sex,Number"
Year <- matrix(i,ncol=1,nrow = nrow(tabinter))
tabinter <- cbind(Year,tabinter)
#Adding the new informations to the final table
tab <- rbind(tab,tabinter)
}
final_table <- tab%>%
rename(
Name = V1,
Sex = V2,
Number = V3
)
####################################################################
# Partie 1 - L'influence de Marylin depuis 1880 #
####################################################################
MarilynFemme <- final_table %>%
filter(Name == "Marilyn")%>%
group_by(Year,Number)%>%
filter(Sex=='F')
MarilynHomme <- final_table %>%
filter(Name == "Marilyn")%>%
group_by(Year,Number)%>%
filter(Sex=='M')
ggplot(MarilynFemme,aes(x=Year,y=Number, fill=Number))+
geom_bar(stat = "identity")+
ggtitle("Evolution des naissances avec le prénom Marilyn depuis 1880, femmes")
ggplot(MarilynHomme,aes(x=Year,y=Number, fill=Number))+
geom_bar(stat = "identity")+
ggtitle("Evolution des naissances avec le prénom Marilyn depuis 1880, hommes")
AdolphHomme <- final_table %>%
filter(Name == "Adolph")%>%
group_by(Year,Number)%>%
filter(Sex=='M')
ggplot(AdolphHomme,aes(x=Year,y=Number, fill=Number))+
geom_bar(stat = "identity")+
ggtitle("Evolution des naissances avec le prénom Adolph depuis 1880, hommes")
##################################
# 1.1 - Number of births by year #
##################################
NaissancesAnDepuis1880 <- final_table %>%
group_by(Year)%>%
summarise(
NbrBirths = sum(Number)
)
ggplot(NaissancesAnDepuis1880,aes(x=Year,y=NbrBirths, color= NbrBirths))+
geom_line(size = 3)+
ggtitle("Nombre de naissances par année depuis 1880, sexes confondus")
NaissancesAn6090 <- final_table %>%
group_by(Year,Sex)%>%
filter(Year < 1990)%>%
filter(Year> 1960)%>%
summarise(NbrBirths = sum(Number))
ggplot(NaissancesAn6090,aes(x=Year,y=NbrBirths, color = Sex))+
geom_line(size=5)+
geom_point()+
ggtitle("Nombre de naissances par année, entre 1960 et 1990")+
NaissancesSexeSep <- final_table %>%
group_by(Year,Sex)%>%
summarise(NbrBirths = sum(Number))
ggplot(NaissancesSexeSep,aes(x=Year,y=NbrBirths, color = Sex))+
geom_line(size=5)+
geom_point()+
ggtitle("Nombre de naissances par année, sexe séparés")
NaissancesSexeSep <- final_table %>%
group_by(Year,Sex)%>%
summarise(NbrBirths = sum(Number))
ggplot(NaissancesSexeSep,aes(x=Year,y=NbrBirths, color = Sex))+
geom_line(size=5)+
geom_point()+
ggtitle("Nombre de naissances par année, sexe séparés")
PrenomsHommeDepuis1880 <- final_table %>%
group_by(Name)%>%
filter(Sex == "M")%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsHommeDepuis1880,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given male names since 1880")
# Create Data
PrenomsHommeDepuis1880 <- data.frame(
name=c('James','John','Robert','Michael','William','Others'),
value= c(2.89,2.86,2.70,2.44,2.34,86.77)
)
# Basic piechart
ggplot(data, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
# Create Data
PrenomsHommeDepuis1880 <- data.frame(
name=c('James','John','Robert','Michael','William','Others'),
value= c(2.89,2.86,2.70,2.44,2.34,86.77)
)
# Basic piechart
ggplot(data, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
# Create Data
PrenomsHommeDepuis1880 <- data.frame(
name=c('James','John','Robert','Michael','William','Others'),
value= c(2.89,2.86,2.70,2.44,2.34,86.77)
)
# Basic piechart
ggplot(PrenomsHommeDepuis1880, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
PrenomsFemmesDepuis1880 <- final_table %>%
group_by(Name)%>%
filter(Sex == "F")%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsFemmesDepuis1880,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given female names since 1880")
name= c('Mary','Elizabeth','Patricia','Jennifer','Linda','Others')
value= c(2.35,0.94,0.89,0.84,0.83,94.15)
# Create Data
PrenonesFemmes1880 <- data.frame(name,value)
# Basic piechart
ggplot(PrenonesFemmes1880, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
PrenomsHommeDepuis2000 <- final_table %>%
group_by(Name)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsHommeDepuis2000,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given male names since 2000")
NbrMaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5MaleNames2000 <- tab233%>% summarise(
N=sum(NumberOfBirths)
)
OtherNames2000 <- (NbrMaleBirths2000[2] - (top5MaleNames2000[1]))
# Create Data
PrenomsHomme2000 <- data.frame(
name=c('Jacob','Michael','Ethan','William','Mathew','Others'),
value= c(402290,363299,335423,333255,317778,OtherNames2000)
)
# Basic piechart
ggplot(PrenomsHomme2000, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
NbrMaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5MaleNames2000 <- tab233%>% summarise(
N=sum(NumberOfBirths)
)
OtherMaleNames2000 <- (NbrMaleBirths2000[2] - (top5MaleNames2000[1]))
# Create Data
PrenomsHomme2000 <- data.frame(
name=c('Jacob','Michael','Ethan','William','Mathew','Others'),
value= c(402290,363299,335423,333255,317778,OtherMaleNames2000)
)
NbrMaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5MaleNames2000 <- tab233%>% summarise(
N=sum(NumberOfBirths)
)
OtherMaleNames2000 <- (NbrMaleBirths2000[2] - (top5MaleNames2000[1]))
#########
PrenomsHommeDepuis2000 <- final_table %>%
group_by(Name)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsHommeDepuis2000,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given male names since 2000")
NbrMaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5MaleNames2000 <- PrenomsHommeDepuis2000%>% summarise(
N=sum(NumberOfBirths)
)
OtherMaleNames2000 <- (NbrMaleBirths2000[2] - (top5MaleNames2000[1]))
# Create Data
PrenomsHomme2000 <- data.frame(
name=c('Jacob','Michael','Ethan','William','Mathew','Others'),
value= c(402290,363299,335423,333255,317778,OtherMaleNames2000)
)
# Basic piechart
ggplot(PrenomsHomme2000, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
#########
PrenomsFemmeDepuis2000 <- final_table %>%
group_by(Name)%>%
filter(Sex == "F")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsFemmeDepuis2000,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given female names since 2000")
NbrFemaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "F")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5FemaleNames2000 <- PrenomsFemmeDepuis2000%>% summarise(
N=sum(NumberOfBirths)
)
OtherFemaleNames2000 <- (NbrMaleBirths2000[2] - (top5FemaleNames2000[1]))
name= c('Emma','Olivia','Emily','Isabella','Sophia','Others')
value= c(363402,327356,315202,313471,293494,OtherFemaleNames2000)
# Create Data
PrenomsFemme2000 <- data.frame(name,value)
# Basic piechart
ggplot(PrenomsFemme2000, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
EvolutionJames <- final_table %>%
group_by(Year)%>%
filter(Name == "James")%>%
summarise(
NumberOfBirths = sum(Number)
)
ggplot(EvolutionJames,aes(x=Year,y=NumberOfBirths))+
geom_line(size=5)+
geom_point()+
ggtitle("The evolution of the name James since 1880")
# The evolution of the name Mary since 1880 #
##############################################
EvolutionMary <- final_table %>%
group_by(Year)%>%
filter(Name == "Mary")%>%
summarise(
NumberOfBirths = sum(Number)
)
ggplot(EvolutionMary,aes(x=Year,y=NumberOfBirths))+
geom_line(size=5)+
geom_point()+
ggtitle("The evolution of the name Mary since 1880")

324
script.R Normal file
View file

@ -0,0 +1,324 @@
library(tidyverse)
library(ggplot2)
setwd("C:/Users/nunes/OneDrive/Bureau/WorkDay/INSA/Big_Data/Projet")
##########################
#Acquisition of the data #
##########################
#Variable containing all the information
tab <- read.table("yob1880.txt",h=FALSE,sep=",")
Year <- matrix(1880,ncol=1,nrow = nrow(tab))
tab <- cbind(Year,tab)
for (i in seq(1881,2019,1)){
#Dynamic name for the source txt file
namesource <- "yob"
namesource <- paste(c(namesource,i),collapse = "")
namesource <- paste (c(namesource,".txt"), collapse="")
# Table used inside the for loop to extrat the informations
tabinter <- read.table(namesource,h=FALSE,sep=",")
#Converting the table into the format "Year,Name,Sex,Number"
Year <- matrix(i,ncol=1,nrow = nrow(tabinter))
tabinter <- cbind(Year,tabinter)
#Adding the new informations to the final table
tab <- rbind(tab,tabinter)
}
final_table <- tab%>%
rename(
Name = V1,
Sex = V2,
Number = V3
)
####################################################################
# Partie 1 - L'influence de Marylin depuis 1880 #
####################################################################
MarilynFemme <- final_table %>%
filter(Name == "Marilyn")%>%
group_by(Year,Number)%>%
filter(Sex=='F')
MarilynHomme <- final_table %>%
filter(Name == "Marilyn")%>%
group_by(Year,Number)%>%
filter(Sex=='M')
ggplot(MarilynFemme,aes(x=Year,y=Number, fill=Number))+
geom_bar(stat = "identity")+
ggtitle("Evolution des naissances avec le prénom Marilyn depuis 1880, femmes")
ggplot(MarilynHomme,aes(x=Year,y=Number, fill=Number))+
geom_bar(stat = "identity")+
ggtitle("Evolution des naissances avec le prénom Marilyn depuis 1880, hommes")
AdolphHomme <- final_table %>%
filter(Name == "Adolph")%>%
group_by(Year,Number)%>%
filter(Sex=='M')
ggplot(AdolphHomme,aes(x=Year,y=Number, fill=Number))+
geom_bar(stat = "identity")+
ggtitle("Evolution des naissances avec le prénom Adolph depuis 1880, hommes")
####################################################################
# Partie 2 - Nombres de naissances depuis 1880 #
####################################################################
##################################
# 1.1 - Number of births by year #
##################################
NaissancesAnDepuis1880 <- final_table %>%
group_by(Year)%>%
summarise(
NbrBirths = sum(Number)
)
ggplot(NaissancesAnDepuis1880,aes(x=Year,y=NbrBirths, color= NbrBirths))+
geom_line(size = 3)+
ggtitle("Nombre de naissances par année depuis 1880, sexes confondus")
NaissancesAn6090 <- final_table %>%
group_by(Year,Sex)%>%
filter(Year < 1990)%>%
filter(Year> 1960)%>%
summarise(NbrBirths = sum(Number))
ggplot(NaissancesAn6090,aes(x=Year,y=NbrBirths, color = Sex))+
geom_line(size=5)+
geom_point()+
ggtitle("Nombre de naissances par année, entre 1960 et 1990")+
NaissancesSexeSep <- final_table %>%
group_by(Year,Sex)%>%
summarise(NbrBirths = sum(Number))
ggplot(NaissancesSexeSep,aes(x=Year,y=NbrBirths, color = Sex))+
geom_line(size=5)+
geom_point()+
ggtitle("Nombre de naissances par année, sexe séparés")
####################################################################
# Partie 3 - Les prenoms les plus communs #
####################################################################
##############
#Depuis 1880 #
##############
#########
#Hommes #
#########
PrenomsHommeDepuis1880 <- final_table %>%
group_by(Name)%>%
filter(Sex == "M")%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsHommeDepuis1880,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given male names since 1880")
# Create Data
PrenomsHommeDepuis1880 <- data.frame(
name=c('James','John','Robert','Michael','William','Others'),
value= c(2.89,2.86,2.70,2.44,2.34,86.77)
)
# Basic piechart
ggplot(PrenomsHommeDepuis1880, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
#########
#Femmes #
#########
PrenomsFemmesDepuis1880 <- final_table %>%
group_by(Name)%>%
filter(Sex == "F")%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsFemmesDepuis1880,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given female names since 1880")
name= c('Mary','Elizabeth','Patricia','Jennifer','Linda','Others')
value= c(2.35,0.94,0.89,0.84,0.83,94.15)
# Create Data
PrenonesFemmes1880 <- data.frame(name,value)
# Basic piechart
ggplot(PrenonesFemmes1880, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
#####################
#Entre 2000 et 2019 #
#####################
#########
#Hommes #
#########
PrenomsHommeDepuis2000 <- final_table %>%
group_by(Name)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsHommeDepuis2000,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given male names since 2000")
NbrMaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "M")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5MaleNames2000 <- PrenomsHommeDepuis2000%>% summarise(
N=sum(NumberOfBirths)
)
OtherMaleNames2000 <- (NbrMaleBirths2000[2] - (top5MaleNames2000[1]))
# Create Data
PrenomsHomme2000 <- data.frame(
name=c('Jacob','Michael','Ethan','William','Mathew','Others'),
value= c(402290,363299,335423,333255,317778,OtherMaleNames2000)
)
# Basic piechart
ggplot(PrenomsHomme2000, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
#########
#Femmes #
#########
PrenomsFemmeDepuis2000 <- final_table %>%
group_by(Name)%>%
filter(Sex == "F")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)%>%
arrange(desc(NumberOfBirths))%>%
top_n(5)
ggplot(PrenomsFemmeDepuis2000,aes(x=Name,y=NumberOfBirths))+
geom_bar(stat = "identity")+
ggtitle("Most given female names since 2000")
NbrFemaleBirths2000 <- final_table %>%
group_by(Sex)%>%
filter(Sex == "F")%>%
filter(Year>2000)%>%
summarise(
NumberOfBirths = sum(Number)
)
top5FemaleNames2000 <- PrenomsFemmeDepuis2000%>% summarise(
N=sum(NumberOfBirths)
)
OtherFemaleNames2000 <- (NbrMaleBirths2000[2] - (top5FemaleNames2000[1]))
name= c('Emma','Olivia','Emily','Isabella','Sophia','Others')
value= c(363402,327356,315202,313471,293494,OtherFemaleNames2000)
# Create Data
PrenomsFemme2000 <- data.frame(name,value)
# Basic piechart
ggplot(PrenomsFemme2000, aes(x="", y=value, fill=name)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0)+
theme_void()
##############################################
# The evolution of the name James since 1880 #
##############################################
EvolutionJames <- final_table %>%
group_by(Year)%>%
filter(Name == "James")%>%
summarise(
NumberOfBirths = sum(Number)
)
ggplot(EvolutionJames,aes(x=Year,y=NumberOfBirths))+
geom_line(size=5)+
geom_point()+
ggtitle("The evolution of the name James since 1880")
##############################################
# The evolution of the name Mary since 1880 #
##############################################
EvolutionMary <- final_table %>%
group_by(Year)%>%
filter(Name == "Mary")%>%
summarise(
NumberOfBirths = sum(Number)
)
ggplot(EvolutionMary,aes(x=Year,y=NumberOfBirths))+
geom_line(size=5)+
geom_point()+
ggtitle("The evolution of the name Mary since 1880")

2000
yob1880.txt Normal file

File diff suppressed because it is too large Load diff

1935
yob1881.txt Normal file

File diff suppressed because it is too large Load diff

2127
yob1882.txt Normal file

File diff suppressed because it is too large Load diff

2084
yob1883.txt Normal file

File diff suppressed because it is too large Load diff

2297
yob1884.txt Normal file

File diff suppressed because it is too large Load diff

2294
yob1885.txt Normal file

File diff suppressed because it is too large Load diff

2392
yob1886.txt Normal file

File diff suppressed because it is too large Load diff

2373
yob1887.txt Normal file

File diff suppressed because it is too large Load diff

2651
yob1888.txt Normal file

File diff suppressed because it is too large Load diff

2590
yob1889.txt Normal file

File diff suppressed because it is too large Load diff

2695
yob1890.txt Normal file

File diff suppressed because it is too large Load diff

2660
yob1891.txt Normal file

File diff suppressed because it is too large Load diff

2921
yob1892.txt Normal file

File diff suppressed because it is too large Load diff

2831
yob1893.txt Normal file

File diff suppressed because it is too large Load diff

2941
yob1894.txt Normal file

File diff suppressed because it is too large Load diff

3049
yob1895.txt Normal file

File diff suppressed because it is too large Load diff

3091
yob1896.txt Normal file

File diff suppressed because it is too large Load diff

3028
yob1897.txt Normal file

File diff suppressed because it is too large Load diff

3264
yob1898.txt Normal file

File diff suppressed because it is too large Load diff

3042
yob1899.txt Normal file

File diff suppressed because it is too large Load diff

3730
yob1900.txt Normal file

File diff suppressed because it is too large Load diff

3153
yob1901.txt Normal file

File diff suppressed because it is too large Load diff

3362
yob1902.txt Normal file

File diff suppressed because it is too large Load diff

3389
yob1903.txt Normal file

File diff suppressed because it is too large Load diff

3560
yob1904.txt Normal file

File diff suppressed because it is too large Load diff

3655
yob1905.txt Normal file

File diff suppressed because it is too large Load diff

3633
yob1906.txt Normal file

File diff suppressed because it is too large Load diff

3948
yob1907.txt Normal file

File diff suppressed because it is too large Load diff

4018
yob1908.txt Normal file

File diff suppressed because it is too large Load diff

4227
yob1909.txt Normal file

File diff suppressed because it is too large Load diff

4629
yob1910.txt Normal file

File diff suppressed because it is too large Load diff

4867
yob1911.txt Normal file

File diff suppressed because it is too large Load diff

6351
yob1912.txt Normal file

File diff suppressed because it is too large Load diff

6968
yob1913.txt Normal file

File diff suppressed because it is too large Load diff

7965
yob1914.txt Normal file

File diff suppressed because it is too large Load diff

9357
yob1915.txt Normal file

File diff suppressed because it is too large Load diff

9696
yob1916.txt Normal file

File diff suppressed because it is too large Load diff

9913
yob1917.txt Normal file

File diff suppressed because it is too large Load diff

10398
yob1918.txt Normal file

File diff suppressed because it is too large Load diff

10369
yob1919.txt Normal file

File diff suppressed because it is too large Load diff

10755
yob1920.txt Normal file

File diff suppressed because it is too large Load diff

10857
yob1921.txt Normal file

File diff suppressed because it is too large Load diff

10756
yob1922.txt Normal file

File diff suppressed because it is too large Load diff

10643
yob1923.txt Normal file

File diff suppressed because it is too large Load diff

10871
yob1924.txt Normal file

File diff suppressed because it is too large Load diff

10638
yob1925.txt Normal file

File diff suppressed because it is too large Load diff

10458
yob1926.txt Normal file

File diff suppressed because it is too large Load diff

10406
yob1927.txt Normal file

File diff suppressed because it is too large Load diff

10159
yob1928.txt Normal file

File diff suppressed because it is too large Load diff

9822
yob1929.txt Normal file

File diff suppressed because it is too large Load diff

9791
yob1930.txt Normal file

File diff suppressed because it is too large Load diff

9298
yob1931.txt Normal file

File diff suppressed because it is too large Load diff

9380
yob1932.txt Normal file

File diff suppressed because it is too large Load diff

9012
yob1933.txt Normal file

File diff suppressed because it is too large Load diff

9180
yob1934.txt Normal file

File diff suppressed because it is too large Load diff

9038
yob1935.txt Normal file

File diff suppressed because it is too large Load diff

8893
yob1936.txt Normal file

File diff suppressed because it is too large Load diff

8946
yob1937.txt Normal file

File diff suppressed because it is too large Load diff

9031
yob1938.txt Normal file

File diff suppressed because it is too large Load diff

8918
yob1939.txt Normal file

File diff suppressed because it is too large Load diff

8961
yob1940.txt Normal file

File diff suppressed because it is too large Load diff

9086
yob1941.txt Normal file

File diff suppressed because it is too large Load diff

9424
yob1942.txt Normal file

File diff suppressed because it is too large Load diff

9408
yob1943.txt Normal file

File diff suppressed because it is too large Load diff

9152
yob1944.txt Normal file

File diff suppressed because it is too large Load diff

9025
yob1945.txt Normal file

File diff suppressed because it is too large Load diff

9706
yob1946.txt Normal file

File diff suppressed because it is too large Load diff

10371
yob1947.txt Normal file

File diff suppressed because it is too large Load diff

10241
yob1948.txt Normal file

File diff suppressed because it is too large Load diff

10270
yob1949.txt Normal file

File diff suppressed because it is too large Load diff

10305
yob1950.txt Normal file

File diff suppressed because it is too large Load diff

10463
yob1951.txt Normal file

File diff suppressed because it is too large Load diff

10645
yob1952.txt Normal file

File diff suppressed because it is too large Load diff

10837
yob1953.txt Normal file

File diff suppressed because it is too large Load diff

10981
yob1954.txt Normal file

File diff suppressed because it is too large Load diff

11122
yob1955.txt Normal file

File diff suppressed because it is too large Load diff

11339
yob1956.txt Normal file

File diff suppressed because it is too large Load diff

11568
yob1957.txt Normal file

File diff suppressed because it is too large Load diff

11526
yob1958.txt Normal file

File diff suppressed because it is too large Load diff

11768
yob1959.txt Normal file

File diff suppressed because it is too large Load diff

11925
yob1960.txt Normal file

File diff suppressed because it is too large Load diff

12180
yob1961.txt Normal file

File diff suppressed because it is too large Load diff

12211
yob1962.txt Normal file

File diff suppressed because it is too large Load diff

12285
yob1963.txt Normal file

File diff suppressed because it is too large Load diff

12398
yob1964.txt Normal file

File diff suppressed because it is too large Load diff

11953
yob1965.txt Normal file

File diff suppressed because it is too large Load diff

12155
yob1966.txt Normal file

File diff suppressed because it is too large Load diff

12400
yob1967.txt Normal file

File diff suppressed because it is too large Load diff

12938
yob1968.txt Normal file

File diff suppressed because it is too large Load diff

13751
yob1969.txt Normal file

File diff suppressed because it is too large Load diff

14778
yob1970.txt Normal file

File diff suppressed because it is too large Load diff

15297
yob1971.txt Normal file

File diff suppressed because it is too large Load diff

15415
yob1972.txt Normal file

File diff suppressed because it is too large Load diff

15683
yob1973.txt Normal file

File diff suppressed because it is too large Load diff

16249
yob1974.txt Normal file

File diff suppressed because it is too large Load diff

16946
yob1975.txt Normal file

File diff suppressed because it is too large Load diff

17395
yob1976.txt Normal file

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more