> ### Dataframes are two dimensional structure with rows and columns.
Basically, it is an array of vectors of equal length.
> It is similar to a tabular data with the number of rows and columns.
The columns represent the vectors or variable names whereas the rows represent the observation number with
respect to the variables.
> # Creating a data frame from vectors
> # Definition of vectors
>
> planets <- c("Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune")
> type <- c("Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Terrestrial planet", "Gas giant", "Gas giant", "Gas giant", "Gas giant")
> type1 <- c (1, 2, 1, 2, 1, 2, 1, 2)
> diameter <- c(0.382, 0.949, 1, 0.532, 11.209, 9.449, 4.007, 3.883)
> rotation <- c(58.64, -243.02, 1, 1.03, 0.41, 0.43, -0.72, 0.67)
> rings <- c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE)
>
> # Create the data frame:
> planets_df <- data.frame(planets, type, diameter, rotation, rings)
> planets_df
planets type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
3 Earth Terrestrial planet 1.000 1.00 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
>
> planets_df1 <- data.frame(planets, type1, diameter, rotation, rings)
> planets_df1
planets type1 diameter rotation rings
1 Mercury 1 0.382 58.64 FALSE
2 Venus 2 0.949 -243.02 FALSE
3 Earth 1 1.000 1.00 FALSE
4 Mars 2 0.532 1.03 FALSE
5 Jupiter 1 11.209 0.41 TRUE
6 Saturn 2 9.449 0.43 TRUE
7 Uranus 1 4.007 -0.72 TRUE
8 Neptune 2 3.883 0.67 TRUE
> # Data exploration
> # Looking and exploring the data
> head(planets_df)
planets type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
3 Earth Terrestrial planet 1.000 1.00 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
> tail(planets_df)
planets type diameter rotation rings
3 Earth Terrestrial planet 1.000 1.00 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
> str(planets_df)
'data.frame': 8 obs. of 5 variables:
$ planets : chr "Mercury" "Venus" "Earth" "Mars" ...
$ type : chr "Terrestrial planet" "Terrestrial planet" "Terrestrial planet" "Terrestrial planet" ...
$ diameter: num 0.382 0.949 1 0.532 11.209 ...
$ rotation: num 58.64 -243.02 1 1.03 0.41 ...
$ rings : logi FALSE FALSE FALSE FALSE TRUE TRUE ...
>
>
> planets_df$type <- as.factor(planets_df$type)
>
> str(planets_df)
'data.frame': 8 obs. of 5 variables:
$ planets : chr "Mercury" "Venus" "Earth" "Mars" ...
$ type : Factor w/ 2 levels "Gas giant","Terrestrial planet": 2 2 2 2 1 1 1 1
$ diameter: num 0.382 0.949 1 0.532 11.209 ...
$ rotation: num 58.64 -243.02 1 1.03 0.41 ...
$ rings : logi FALSE FALSE FALSE FALSE TRUE TRUE ...
> str(planets_df1)
'data.frame': 8 obs. of 5 variables:
$ planets : chr "Mercury" "Venus" "Earth" "Mars" ...
$ type1 : num 1 2 1 2 1 2 1 2
$ diameter: num 0.382 0.949 1 0.532 11.209 ...
$ rotation: num 58.64 -243.02 1 1.03 0.41 ...
$ rings : logi FALSE FALSE FALSE FALSE TRUE TRUE ...
>
> # Selection of dataframe elements
>
> #select all elements of a row
> planets_df[1,]
planets type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
>
> #select all elements of a column
> planets_df[ ,2]
[1] Terrestrial planet Terrestrial planet Terrestrial planet Terrestrial planet
[5] Gas giant Gas giant Gas giant Gas giant
Levels: Gas giant Terrestrial planet
>
> # selects the value at the first row and second column
> planets_df[1,2]
[1] Terrestrial planet
Levels: Gas giant Terrestrial planet
> #selects rows 1, 2, 3 and columns 2, 3, 4
> planets_df[1:3, 1:2]
planets type
1 Mercury Terrestrial planet
2 Venus Terrestrial planet
3 Earth Terrestrial planet
>
> # All data from the first three planets
> closest_planets_df <- planets_df[1:3, ]
>
> # All data from the last three planets
> furthest_planets_df <- planets_df[6:8, ]
>
> # Have a look:
> closest_planets_df
planets type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
3 Earth Terrestrial planet 1.000 1.00 FALSE
> furthest_planets_df
planets type diameter rotation rings
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
>
> # Print out diameter of Mercury (row 1, column 3)
> planets_df[1,3]
[1] 0.382
>
> # Print out data for Mars (entire fourth row)
> planets_df[4, ]
planets type diameter rotation rings
4 Mars Terrestrial planet 0.532 1.03 FALSE
>
> #Selection of dataframe elements 2
>
> planets_df[1:3, "type"]
[1] Terrestrial planet Terrestrial planet Terrestrial planet
Levels: Gas giant Terrestrial planet
> planets_df[2,"diameter"]
[1] 0.949
>
> # Do selection on both rows and columns
> planets_df [3:8, "diameter"]
[1] 1.000 0.532 11.209 9.449 4.007 3.883
>
> # Creating a subset from the dataframe
>
> planets_subset_df <- subset(planets_df, subset = diameter < 1)
>
> #only planet with rings
>
> # Create the rings_vector
> rings_vector <- planets_df [, 5]
> rings_vector
[1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
>
> # Select the information on planets with rings:
> planets_df[rings_vector,]
planets type diameter rotation rings
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
>
> # Sorting and order
> order(planets_df$diameter)
[1] 1 4 2 3 8 7 6 5
>
> # Use order() to create positions
> positions <- order(planets_df$diameter)
> positions
[1] 1 4 2 3 8 7 6 5
>
> # Use positions to sort planets_df
> planets_df[positions, ]
planets type diameter rotation rings
1 Mercury Terrestrial planet 0.382 58.64 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
3 Earth Terrestrial planet 1.000 1.00 FALSE
8 Neptune Gas giant 3.883 0.67 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
5 Jupiter Gas giant 11.209 0.41 TRUE
>
> # Create new "ordered" data frame:
> new_df <- planets_df[positions, ]
>
> # Use order() to create positions
> positions <- order(planets_df$diameter, decreasing = TRUE)
> positions
[1] 5 6 7 8 3 2 4 1
>
> # Use positions to sort planets_df
> planets_df[positions, ]
planets type diameter rotation rings
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
3 Earth Terrestrial planet 1.000 1.00 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
1 Mercury Terrestrial planet 0.382 58.64 FALSE
>
> # Create new "ordered" data frame:
> new_df <- planets_df[positions, ]
> new_df
planets type diameter rotation rings
5 Jupiter Gas giant 11.209 0.41 TRUE
6 Saturn Gas giant 9.449 0.43 TRUE
7 Uranus Gas giant 4.007 -0.72 TRUE
8 Neptune Gas giant 3.883 0.67 TRUE
3 Earth Terrestrial planet 1.000 1.00 FALSE
2 Venus Terrestrial planet 0.949 -243.02 FALSE
4 Mars Terrestrial planet 0.532 1.03 FALSE
1 Mercury Terrestrial planet 0.382 58.64 FALSE