National parcel number (state-county FIPS prefix + local parcel number)
siteadd
Full site address
sunit
Site address unit/suite/apartment number
scity
Site address city
szip
Site address zip code
mailadd
Full mailing address
ownname
Full owner name (primary surface owner)
ownfrst
Owner first name
ownlast
Owner last name
parval
Total parcel value (improvval + landval), in dollars
parusedesc
Tax parcel use description (e.g., residential, commercial, agriculture)
improvval
Value of improvements on the parcel, in dollars
landval
Value of land on the parcel, in dollars
structyear
Year built of the primary building
struct
Structure indicator: Y = yes, N = no, U = unknown
multistruc
Multiple structures indicator: Y = yes, N = no, U = unknown
structno
Number of structures on the parcel
In [6]:
Show / hide code
# Open connection just for this querycon <-dbConnect(RSQLite::SQLite(), gpkg_path)sql <-sprintf(' SELECT DISTINCT CAST(cntyfips AS TEXT) AS cntyfips FROM "%s" WHERE cntyfips IS NOT NULL ORDER BY cntyfips', parcels_layer)cntys <-dbGetQuery(con, sql)$cntyfipsdbDisconnect(con)cntys <- cntys[!is.na(cntys) &nzchar(cntys)]
In [7]:
Show / hide code
#if (file.exists(out_gpkg)) file.remove(out_gpkg)if (!file.exists(out_gpkg)) { con <- DBI::dbConnect(RSQLite::SQLite(), gpkg_path)on.exit(DBI::dbDisconnect(con), add =TRUE) cols_sql <-paste(sprintf('"%s"', cols_keep), collapse =", ") wrote_any <-FALSE t0 <-Sys.time()for (i inseq_along(cntys)) { c <- cntys[[i]]if (i %%5==0) {message(sprintf("[%d/%d] county=%s elapsed=%.1f min", i, length(cntys), c,as.numeric(difftime(Sys.time(), t0, units="mins")))) } c_q <- DBI::dbQuoteString(con, c) sql <-sprintf('SELECT %s FROM "%s" WHERE CAST(cntyfips AS TEXT) = %s', cols_sql, parcels_layer, c_q ) x <- sf::st_read(gpkg_path, query = sql, quiet =TRUE)if (nrow(x) ==0) next# cheap spatial restrictions x <- sf::st_crop(x, study_bbox)if (nrow(x) ==0) next x_in <- sf::st_filter(x, study_union, .predicate = sf::st_intersects)if (nrow(x_in) ==0) next sf::st_write( x_in, out_gpkg,layer = out_layer,append = wrote_any,quiet =TRUE ) wrote_any <-TRUErm(x, x_in); gc() }if (!wrote_any) stop("No parcels were written—check CRS/study area.")message("Wrote: ", out_gpkg)} else {message("Already exists: ", out_gpkg)}
load(file.path(out_dir, "duplicate_parcels_study_area.rdata"))datatable( dupes_full,escape =FALSE,filter ="top",options =list(pageLength =10, scrollX =TRUE),caption ="Duplicate parcels (by cntyfips + parno) in study area")
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
The study area contains 4,273,305 total parcels, of which 22,379 contain duplicates.
In [13]:
Show / hide code
dupes_full |> dplyr::count(n, name ="num_parcels") |> dplyr::arrange(dplyr::desc(n)) |>datatable(colnames =c("Duplicate Count (n)", "Number of Parcels"),caption ="Distribution of duplicate counts",options =list(pageLength =10) )
In [14]:
Show / hide code
ggplot(dupes_full, aes(x = n)) +geom_histogram(binwidth =1, fill ="steelblue", color ="white") +scale_x_continuous(breaks =seq(2, max(dupes_full$n), by =1)) +labs(title ="Distribution of Duplicate Counts",x ="Number of Duplicates (n)",y ="Number of Parcels" ) +theme_minimal()