library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
I simply opened your text file and deleted everything down to ‘SAMPLE SUMMARY’. This could also be done using code.
Here is some guidance on how to tell R you want to only read the file below a certain text string.
For reference in the code below, I re-named the data file without the
long header Alex-data.txt
and it is in a folder called
data
in the enclosing project.
# read in the data
data <- read.csv("./data/Alex-data.txt")
# remove the two extra header rows
data_clean <- data[-(1:2),]
# what does the data look like now?
str(data_clean)
'data.frame': 28543 obs. of 118 variables:
$ DateTime : chr "0:00:16" "0:00:17" "0:00:18" "0:00:19" ...
$ DateTime.1 : chr "1687957801" "1687957802" "1687957803" "1687957804" ...
$ DateTime.2 : chr "1687957800" "1687957801" "1687957802" "1687957803" ...
$ DateTime.3 : chr "2023-06-28T13:10:01" "2023-06-28T13:10:02" "2023-06-28T13:10:03" "2023-06-28T13:10:04" ...
$ DateTime.4 : chr "2023-06-28T09:10:01" "2023-06-28T09:10:02" "2023-06-28T09:10:03" "2023-06-28T09:10:04" ...
$ FilterSample : chr "1.016" "0.000" "0.000" "0.000" ...
$ FilterSample.1: chr "0.508" "0.000" "0.000" "0.000" ...
$ FilterSample.2: chr "0.017" "0.017" "0.017" "0.017" ...
$ FilterSample.3: chr "140.65" "140.65" "140.65" "140.65" ...
$ Battery : chr "099" "099" "099" "099" ...
$ Atmo : chr "23.63" "23.82" "23.98" "24.10" ...
$ Atmo.1 : chr "994.28" "994.29" "994.29" "994.29" ...
$ Atmo.2 : chr "54.63" "55.33" "55.18" "54.96" ...
$ Atmo.3 : chr "1.1600" "1.1591" "1.1585" "1.1579" ...
$ Atmo.4 : chr "159.1" "159.1" "159.1" "159.1" ...
$ GPS : chr "0" "0" "0" "0" ...
$ GPS.1 : chr "-9999.000000" "-9999.000000" "-9999.000000" "-9999.000000" ...
$ GPS.2 : chr "-9999.000000" "-9999.000000" "-9999.000000" "-9999.000000" ...
$ GPS.3 : chr "-9999.0" "-9999.0" "-9999.0" "-9999.0" ...
$ GPS.4 : chr "0" "0" "0" "0" ...
$ GPS.5 : chr "-9999.000000" "-9999.000000" "-9999.000000" "-9999.000000" ...
$ GPS.6 : chr "-9999.0" "-9999.0" "-9999.0" "-9999.0" ...
$ Motion : chr " 0" " -350" " -149" " 102" ...
$ Motion.1 : chr "" "" "" "" ...
$ Motion.2 : chr "" "" "" "" ...
$ Motion.3 : chr "" "" "" "" ...
$ Motion.4 : chr " 0" " -1081" " 778" " 877" ...
$ Motion.5 : chr "" "" "" "" ...
$ Motion.6 : chr "" "" "" "" ...
$ Motion.7 : chr "" "" "" "" ...
$ Motion.8 : chr " 0" " -241" " -197" " -246" ...
$ Motion.9 : chr "" "" "" "" ...
$ Motion.10 : chr "" "" "" "" ...
$ Motion.11 : chr "" "" "" "" ...
$ Motion.12 : chr " 0" " -805" "-24412" " 24911" ...
$ Motion.13 : chr "" "" "" "" ...
$ Motion.14 : chr "" "" "" "" ...
$ Motion.15 : chr "" "" "" "" ...
$ Motion.16 : chr " 0" "-33538" " 17386" "-19171" ...
$ Motion.17 : chr "" "" "" "" ...
$ Motion.18 : chr "" "" "" "" ...
$ Motion.19 : chr "" "" "" "" ...
$ Motion.20 : chr " 0" "-187363" "160282" " 1295" ...
$ Motion.21 : chr "" "" "" "" ...
$ Motion.22 : chr "" "" "" "" ...
$ Motion.23 : chr "" "" "" "" ...
$ Motion.24 : chr "" "" "" "" ...
$ Motion.25 : chr "" "" "" "" ...
$ Motion.26 : chr "nan" "0.0" "0.0" "0.0" ...
$ Motion.27 : chr "nan" "0.0" "0.0" "0.0" ...
$ Motion.28 : chr "nan" "100.0" "0.0" "0.0" ...
$ Motion.29 : chr "nan" "0.0" "100.0" "100.0" ...
$ Motion.30 : chr "nan" "0.0" "0.0" "0.0" ...
$ Motion.31 : chr "nan" "0.0" "0.0" "0.0" ...
$ Motion.32 : chr "0" "0" "0" "0" ...
$ Light : chr "242.7" "181.2" "315.6" "399.4" ...
$ Light.1 : chr "0.39" "0.07" "0.19" "0.12" ...
$ Light.2 : chr "1930.0" "753.0" "1901.0" "2673.0" ...
$ Light.3 : chr "1713.0" "1253.0" "2191.0" "2910.0" ...
$ Light.4 : chr "2174.0" "1583.0" "2514.0" "3576.0" ...
$ Light.5 : chr "48.0" "-9.0" "-23.0" "15.0" ...
$ PMSensor : chr "1" "1" "1" "1" ...
$ PMSensor.1 : chr "75.62" "75.74" "76.08" "76.39" ...
$ PMSensor.2 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.3 : chr "82.48" "82.60" "82.94" "83.25" ...
$ PMSensor.4 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.5 : chr "84.53" "84.65" "84.97" "85.25" ...
$ PMSensor.6 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.7 : chr "85.56" "85.67" "85.98" "86.26" ...
$ PMSensor.8 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.9 : chr "524.57" "525.46" "527.90" "530.14" ...
$ PMSensor.10 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.11 : chr "599.12" "600.12" "602.84" "605.33" ...
$ PMSensor.12 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.13 : chr "602.93" "603.92" "606.62" "609.10" ...
$ PMSensor.14 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.15 : chr "603.61" "604.60" "607.29" "609.77" ...
$ PMSensor.16 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.17 : chr "603.61" "604.60" "607.29" "609.77" ...
$ PMSensor.18 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.19 : chr "0.55" "0.56" "0.55" "0.55" ...
$ PMSensor.20 : chr "0.00" "0.00" "0.00" "0.00" ...
$ PMSensor.21 : chr "0.0007" "0.0007" "0.0007" "0.0007" ...
$ PMSensor.22 : chr "0" "0" "0" "0" ...
$ PMSensor.23 : chr "0" "0" "0" "0" ...
$ PMSensor.24 : chr "0" "0" "0" "0" ...
$ PMSensor.25 : chr "0" "0" "0" "0" ...
$ EngData : chr "23.93" "23.97" "24.00" "24.04" ...
$ EngData.1 : chr "23.53" "23.56" "23.59" "23.59" ...
$ EngData.2 : chr "24.68" "24.68" "24.68" "24.68" ...
$ EngData.3 : chr "0.00" "21.36" "21.35" "21.39" ...
$ EngData.4 : chr "109.20" "200.84" "200.84" "200.85" ...
$ EngData.5 : chr "995.2" "995.2" "995.3" "995.2" ...
$ EngData.6 : chr "571" "571" "571" "571" ...
$ EngData.7 : chr "0" "0" "0" "0" ...
$ EngData.8 : chr "9.194" "0.000" "0.000" "0.000" ...
$ EngData.9 : chr "1.17806" "0.00000" "0.00000" "0.00000" ...
$ EngData.10 : chr "1.342375" "1.342375" "1.342375" "1.342375" ...
$ EngData.11 : chr "52746" "52746" "52746" "52745" ...
[list output truncated]
# write the data
write.csv(data_clean, "./data/Alex-clean-data.txt", row.names=F)
This is so that R can recognize the variable types (more) correctly. It’s also possible to fix this using code but kind of a pain with all of these variables.
data_really_clean <- read.csv("./data/Alex-clean-data.txt")
str(data_really_clean)
'data.frame': 28543 obs. of 118 variables:
$ DateTime : chr "0:00:16" "0:00:17" "0:00:18" "0:00:19" ...
$ DateTime.1 : int 1687957801 1687957802 1687957803 1687957804 1687957805 1687957806 1687957807 1687957808 1687957809 1687957810 ...
$ DateTime.2 : int 1687957800 1687957801 1687957802 1687957803 1687957804 1687957805 1687957806 1687957807 1687957808 1687957809 ...
$ DateTime.3 : chr "2023-06-28T13:10:01" "2023-06-28T13:10:02" "2023-06-28T13:10:03" "2023-06-28T13:10:04" ...
$ DateTime.4 : chr "2023-06-28T09:10:01" "2023-06-28T09:10:02" "2023-06-28T09:10:03" "2023-06-28T09:10:04" ...
$ FilterSample : num 1.02 0 0 0 0 ...
$ FilterSample.1: num 0.508 0 0 0 0 0 0 0 0 0 ...
$ FilterSample.2: num 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 0.017 ...
$ FilterSample.3: num 141 141 141 141 141 ...
$ Battery : int 99 99 99 99 99 99 99 99 99 99 ...
$ Atmo : num 23.6 23.8 24 24.1 24.2 ...
$ Atmo.1 : num 994 994 994 994 994 ...
$ Atmo.2 : num 54.6 55.3 55.2 55 54.7 ...
$ Atmo.3 : num 1.16 1.16 1.16 1.16 1.16 ...
$ Atmo.4 : num 159 159 159 159 159 ...
$ GPS : int 0 0 0 0 0 0 0 0 0 0 ...
$ GPS.1 : num -9999 -9999 -9999 -9999 -9999 ...
$ GPS.2 : num -9999 -9999 -9999 -9999 -9999 ...
$ GPS.3 : num -9999 -9999 -9999 -9999 -9999 ...
$ GPS.4 : int 0 0 0 0 0 0 0 0 0 0 ...
$ GPS.5 : num -9999 -9999 -9999 -9999 -9999 ...
$ GPS.6 : num -9999 -9999 -9999 -9999 -9999 ...
$ Motion : int 0 -350 -149 102 177 257 225 34 891 1180 ...
$ Motion.1 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.2 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.3 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.4 : int 0 -1081 778 877 705 1030 950 854 -14 95 ...
$ Motion.5 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.6 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.7 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.8 : int 0 -241 -197 -246 -645 -302 91 234 227 400 ...
$ Motion.9 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.10 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.11 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.12 : int 0 -805 -24412 24911 -8890 -205476 57417 80570 -4882 60208 ...
$ Motion.13 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.14 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.15 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.16 : int 0 -33538 17386 -19171 988 9047 22487 3648 -7428 27798 ...
$ Motion.17 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.18 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.19 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.20 : int 0 -187363 160282 1295 140 -30642 -5915 23546 19652 -11803 ...
$ Motion.21 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.22 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.23 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.24 : int NA NA NA NA NA NA NA NA NA NA ...
$ Motion.25 : num NA NA NA NA NA NA NA NA NA NA ...
$ Motion.26 : num NaN 0 0 0 0 0 0 0 0 0 ...
$ Motion.27 : num NaN 0 0 0 0 0 0 0 100 100 ...
$ Motion.28 : num NaN 100 0 0 0 0 0 0 0 0 ...
$ Motion.29 : num NaN 0 100 100 100 100 100 100 0 0 ...
$ Motion.30 : num NaN 0 0 0 0 0 0 0 0 0 ...
$ Motion.31 : num NaN 0 0 0 0 0 0 0 0 0 ...
$ Motion.32 : int 0 0 0 0 0 0 0 0 0 0 ...
$ Light : num 243 181 316 399 893 ...
$ Light.1 : num 0.39 0.07 0.19 0.12 0.04 0.39 0.29 0.59 0.6 0.57 ...
$ Light.2 : num 1930 753 1901 2673 7065 ...
$ Light.3 : num 1713 1253 2191 2910 7079 ...
$ Light.4 : num 2174 1583 2514 3576 8172 ...
$ Light.5 : num 48 -9 -23 15 5 48 35 73 74 70 ...
$ PMSensor : int 1 1 1 1 1 1 1 1 1 1 ...
$ PMSensor.1 : num 75.6 75.7 76.1 76.4 76.8 ...
$ PMSensor.2 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.3 : num 82.5 82.6 82.9 83.2 83.6 ...
$ PMSensor.4 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.5 : num 84.5 84.7 85 85.2 85.6 ...
$ PMSensor.6 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.7 : num 85.6 85.7 86 86.3 86.6 ...
$ PMSensor.8 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.9 : num 525 525 528 530 533 ...
$ PMSensor.10 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.11 : num 599 600 603 605 608 ...
$ PMSensor.12 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.13 : num 603 604 607 609 612 ...
$ PMSensor.14 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.15 : num 604 605 607 610 613 ...
$ PMSensor.16 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.17 : num 604 605 607 610 613 ...
$ PMSensor.18 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.19 : num 0.55 0.56 0.55 0.55 0.55 0.54 0.54 0.55 0.56 0.56 ...
$ PMSensor.20 : num 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.21 : num 7e-04 7e-04 7e-04 7e-04 7e-04 7e-04 7e-04 7e-04 7e-04 7e-04 ...
$ PMSensor.22 : int 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.23 : int 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.24 : int 0 0 0 0 0 0 0 0 0 0 ...
$ PMSensor.25 : int 0 0 0 0 0 0 0 0 0 0 ...
$ EngData : num 23.9 24 24 24 24.1 ...
$ EngData.1 : num 23.5 23.6 23.6 23.6 23.6 ...
$ EngData.2 : num 24.7 24.7 24.7 24.7 24.7 ...
$ EngData.3 : num 0 21.4 21.4 21.4 21.4 ...
$ EngData.4 : num 109 201 201 201 201 ...
$ EngData.5 : num 995 995 995 995 995 ...
$ EngData.6 : int 571 571 571 571 571 571 571 571 571 571 ...
$ EngData.7 : int 0 0 0 0 0 0 0 0 0 0 ...
$ EngData.8 : num 9.19 0 0 0 0 ...
$ EngData.9 : num 1.18 0 0 0 0 ...
$ EngData.10 : num 1.34 1.34 1.34 1.34 1.34 ...
$ EngData.11 : int 52746 52746 52746 52745 52745 52745 52744 52744 52744 52744 ...
[list output truncated]
ggplot(data_really_clean, aes(x = DateTime.1, y = PMSensor.1)) +
geom_point() +
theme_bw()
Version | Author | Date |
---|---|---|
e751129 | maggiedouglas | 2024-02-04 |
sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.4
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[5] purrr_1.0.2 readr_2.1.5 tidyr_1.3.0 tibble_3.2.1
[9] ggplot2_3.4.4 tidyverse_2.0.0 workflowr_1.7.1
loaded via a namespace (and not attached):
[1] sass_0.4.8 utf8_1.2.4 generics_0.1.3 stringi_1.8.3
[5] hms_1.1.3 digest_0.6.34 magrittr_2.0.3 timechange_0.3.0
[9] evaluate_0.23 grid_4.3.2 fastmap_1.1.1 rprojroot_2.0.4
[13] jsonlite_1.8.8 processx_3.8.3 whisker_0.4.1 ps_1.7.5
[17] promises_1.2.1 httr_1.4.7 fansi_1.0.6 scales_1.3.0
[21] jquerylib_0.1.4 cli_3.6.2 rlang_1.1.3 munsell_0.5.0
[25] withr_3.0.0 cachem_1.0.8 yaml_2.3.8 tools_4.3.2
[29] tzdb_0.4.0 colorspace_2.1-0 httpuv_1.6.13 vctrs_0.6.5
[33] R6_2.5.1 lifecycle_1.0.4 git2r_0.33.0 fs_1.6.3
[37] pkgconfig_2.0.3 callr_3.7.3 pillar_1.9.0 bslib_0.6.1
[41] later_1.3.2 gtable_0.3.4 glue_1.7.0 Rcpp_1.0.12
[45] highr_0.10 xfun_0.41 tidyselect_1.2.0 rstudioapi_0.15.0
[49] knitr_1.45 farver_2.1.1 htmltools_0.5.7 labeling_0.4.3
[53] rmarkdown_2.25 compiler_4.3.2 getPass_0.2-4