% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/segment_tree_crowns.R
\name{segment_tree_crowns}
\alias{segment_tree_crowns}
\alias{segment_tree_crowns,data.frame-method}
\alias{segment_tree_crowns,LAS-method}
\alias{segment_tree_crowns,LAScatalog-method}
\title{Segment Tree Crowns in a 3D Point Cloud}
\usage{
segment_tree_crowns(
  point_cloud,
  crown_diameter_to_tree_height,
  crown_length_to_tree_height,
  crown_diameter_constant = 0,
  crown_length_constant = 0,
  segment_crowns_only_above = 0,
  ground_height = NULL,
  crown_id_column_name = "crown_id",
  centroid_convergence_distance = 0.01,
  max_iterations_per_point = 500,
  dbscan_neighborhood_radius = 0.3,
  min_num_points_per_crown = 5,
  ...
)

\S4method{segment_tree_crowns}{data.frame}(
  point_cloud,
  crown_diameter_to_tree_height,
  crown_length_to_tree_height,
  crown_diameter_constant,
  crown_length_constant,
  segment_crowns_only_above,
  ground_height,
  crown_id_column_name,
  centroid_convergence_distance,
  max_iterations_per_point,
  dbscan_neighborhood_radius,
  min_num_points_per_crown,
  verbose = TRUE,
  also_return_terminal_centroids = FALSE,
  also_return_all_centroids = FALSE
)

\S4method{segment_tree_crowns}{LAS}(
  point_cloud,
  crown_diameter_to_tree_height,
  crown_length_to_tree_height,
  crown_diameter_constant,
  crown_length_constant,
  segment_crowns_only_above,
  ground_height,
  crown_id_column_name,
  centroid_convergence_distance,
  max_iterations_per_point,
  dbscan_neighborhood_radius,
  min_num_points_per_crown,
  verbose = TRUE,
  also_return_terminal_centroids = FALSE,
  also_return_all_centroids = FALSE,
  write_crown_id_also_to_file = FALSE,
  crown_id_file_description = crown_id_column_name
)

\S4method{segment_tree_crowns}{LAScatalog}(
  point_cloud,
  crown_diameter_to_tree_height,
  crown_length_to_tree_height,
  crown_diameter_constant,
  crown_length_constant,
  segment_crowns_only_above,
  ground_height,
  crown_id_column_name,
  centroid_convergence_distance,
  max_iterations_per_point,
  dbscan_neighborhood_radius,
  min_num_points_per_crown,
  write_crown_id_also_to_file = TRUE,
  crown_id_file_description = crown_id_column_name
)
}
\arguments{
\item{point_cloud}{A data set containing xyz-coordinates. Can be passed as
either a \link[=data.frame]{data.frame}, a
\link[data.table:data.table]{data.table}, a \link[lidR:LAS-class]{LAS object} or
a \link[lidR:LAScatalog-class]{LAScatalog}.

If it's a \link[=data.frame]{data.frame} or a
\link[data.table:data.table]{data.table} the function searches for coordinate
columns by looking for the first numeric columns named "x"/"X", "y"/"Y", or
"z"/"Z". For each instance where it can't find one of those it selects the
next available numeric column in the table and issues a warning.}

\item{crown_diameter_to_tree_height}{Single number or
\link[terra:SpatRaster-class]{SpatRasters} covering the area of the \code{point_cloud}.
The diameter of the search kernel will be calculated by multiplying this
value and the height above ground of the kernel center, and adding the
crown_diameter_constant. For details see "How the algorithm works". Points
will not be segmented wherever a raster contains \code{NA} values.}

\item{crown_length_to_tree_height}{Single number or
\link[terra:SpatRaster-class]{SpatRasters} covering the area of the \code{point_cloud}.
The height of the search kernel will be calculated by multiplying this
value and the height above ground of the kernel center, and adding the
crown_length_constant. For details see "How the algorithm works". Points
will not be segmented wherever a raster contains \code{NA} values.}

\item{crown_diameter_constant, crown_length_constant}{Single number >=0.
Used to determine the dimensions of the search kernel, together with the
respective ratios to tree height. For details see "How the algorithm
works".}

\item{segment_crowns_only_above}{A single positive number denoting the
minimum height above ground at which crown IDs will be calculated.

Note that points directly below this threshold will still be considered
during the segmentation if they are within reach of search kernels
constructed at the \code{segment_crowns_only_above} height. See "How the
algorithm works" to learn about the search kernels.}

\item{ground_height}{One of
\itemize{
\item \code{NULL}, indicating that \code{point_cloud} is normalized with ground height at
zero.
\item A \link[terra:SpatRaster-class]{SpatRaster} providing ground heights for the area
of the (not normalized) \code{point_cloud}.
\item A list of (ideally named) arguments to the
\link[lidR:rasterize]{lidR rasterize_terrain()} function, which
will be used to generate a ground height grid from \code{point_cloud}.
Currently not supported with point clouds stored in
\link[=data.frame]{data.frames}. The list should not contain an argument to
the "las" parameter of \link[lidR:rasterize]{rasterize_terrain()}.
}

Points will not be segmented wherever ground heights are NA.}

\item{crown_id_column_name}{A character string. The column or attribute name
under which IDs for segmented bodies should be stored.}

\item{centroid_convergence_distance}{A single number. Distance at which it is
assumed that subsequently calculated centroids have converged to the
nearest mode. See "How the algorithm works" to learn about centroids and
modes in the context of the AMS3D algorithm.}

\item{max_iterations_per_point}{A single integer. Maximum number of
centroids calculated before the search for the nearest mode stops. See
"How the algorithm works" to learn about centroids and modes in the context
of the AMS3D algorithm.}

\item{dbscan_neighborhood_radius}{A single number. Radius for the spherical
DBSCAN neighborhood around a mode. See "How the algorithm works" to learn
about neighborhoods in the context of the DBSCAN algorithm.}

\item{min_num_points_per_crown}{A single integer. The minimum number of
converged centroids within a DBSCAN neighborhood at which the centroid in
the neighborhood's center will be treated as a core point. See "How the
algorithm works" to learn about neighborhoods and core points in the
context of the DBSCANb algorithm.}

\item{...}{Unused.}

\item{verbose}{\code{TRUE} or \code{FALSE}. Should the function show a progress bar and
other runtime information in the console?}

\item{also_return_terminal_centroids}{\code{TRUE} or \code{FALSE}. Should mode coordinates be
returned as well?}

\item{also_return_all_centroids}{\code{TRUE} or \code{FALSE}. Should all centroid coordinates
be returned as well? This slows down processing by a little bit and will
return a data set which requires at least ~10 times more memory than the
input point cloud.}

\item{write_crown_id_also_to_file}{\code{TRUE} or \code{FALSE}. When writing
the returned LAS object to disk, should the IDs of segmented bodies be
written into that file as well? See the
\link[lidR:add_attribute]{lidR function add_lasattribute()} for additional
details. Will also be used for all attributes of the
\link[lidR:LAS-class]{LAS object(s)} which are returned if \code{also_return_terminal_centroids}
and/or \code{also_return_all_centroids} were set to \code{TRUE}.

For \link[lidR:LAScatalog-class]{LAScatalogs}, this is only used if the
result is returned as a \link[lidR:LAS-class]{LAS object} in memory. If the
LAScatalog is set up to write the segmented point clouds into files, the
IDs of segmented bodies will always be written to these files as well.}

\item{crown_id_file_description}{A character string. If
\code{write_crown_id_also_to_file} is set to \code{TRUE} this will be used
as an additional description of the IDs of segmented bodies when the LAS
object is written to disk. See the "desc" parameter of the
\link[lidR:add_attribute]{lidR function add_lasattribute()} for additional
details.}
}
\value{
The point cloud which was passed to the function but extended with a
column/attribute holding for each point the ID of a segmented body. IDs
with the value \code{NA} indicate that a point was not assigned to any
body.

If \code{also_return_terminal_centroids} and/or \code{also_return_all_centroids} were set to \code{TRUE}, a
list with at most three named elements in the following order:
\describe{
\item{segmented_point_cloud}{
The segmented point cloud which would have been returned directly if
\code{also_return_terminal_centroids} and \code{also_return_all_centroids} had been set
to \code{FALSE}.
}
\item{terminal_centroids}{
If \code{also_return_terminal_centroids} was set to \code{TRUE}, a point cloud of
the same type as the input point cloud holding the terminal centroids calculated
with the AMS3D algorithm and two additional columns/attributes. One of
these columns/attributes holds IDs of the segmented bodies that the
modes belong to and the other (named "point_index") holds indices to
the points in the input point cloud.
}
\item{centroids}{
If \code{also_return_all_centroids} was set to \code{TRUE}, a point cloud
of the same type as the input point cloud holding the centroids
calculated with the AMS3D algorithm and two additional
columns/attributes. One of these columns/attributes holds IDs of the
segmented bodies that the centroids belong to and the other (named
"point_index") holds indices to the points in the input point cloud.
}
}

The method for \link[lidR:LAScatalog-class]{LASCatalogs} works just like
any other lidR function that accepts them, i.e. it returns either an
in-memory \link[lidR:LAS-class]{LAS object} or writes the processed chunks to
individual files and returns those file's names. Please refer to the
\link[lidR:LAScatalog-class]{LASCatalog} documentation for more details.
}
\description{
Employs a variant of the mean shift algorithm (Ferraz et. al, 2016) and after
that the DBSCAN algorithm in order to identify tree crowns in airborne lidar
data.
}
\section{Functions}{
\itemize{
\item \code{segment_tree_crowns(data.frame)}: Segments coordinates stored as three columns
in a \link[=data.frame]{data.frame} or \link[data.table:data.table]{data.table}.

\item \code{segment_tree_crowns(LAS)}: Segments the point cloud data of a
\link[lidR:LAS-class]{LAS object}.

\item \code{segment_tree_crowns(LAScatalog)}: Segments the point cloud data of a
\link[lidR:LAScatalog-class]{LAScatalog}. This method does not support
additionally returning centroids. Instead of the verbose
parameter use the LAScatalog's progress option (see the
\link[lidR:LAScatalog-class]{LAScatalog documentation} -> "Processing options"
-> "progress").

}}
\section{How the algorithm works}{


The basic assumption is that tree crowns form local maxima of point density
and height within lidar point clouds. These local maxima are called
\emph{modes}. The algorithm tries to find the nearest mode for each point. This
is done by looking at the surrounding points and moving into the direction
of the highest point density until the nearest mode is (almost) reached.

The surrounding points are found with a search kernel (a three-dimensional
search window) which has the shape of a vertical cylinder. According to
literature, the algorithm works best if the search kernel has roughly the
size of the surrounding crowns. Therefore, the parameters controlling the
kernels dimension are simplistically called
\code{crown_diameter_to_tree_height}, \code{crown_diameter_constant}, and
\code{crown_lenght...} respectively. The diameter of the kernel is calculated
from the  height above ground of the kernels center times the value for
crown_diameter_to_tree_height, plus the crown_diameter constant. The
height of the kernel is calculated respectively.

The direction of the highest point density is found by calculating the
average position of all points within the cylinder, the cylinder's so
called \emph{centroid}. In order to move further into the direction of the
highest point density, a new cylinder is placed on the centroid and a new
centroid is calculated for that cylinder. This continues on until the
cylinders "stop moving", i.e. until two subsequently calculated centroids
are closer to each other than \code{centroid_convergence_distance}. At this
point, the most recently calculated centroid, hence called 'terminal
centroid', is assumed to be close enough to the mode, so that the
original point can be linked to the respective tree top.

It sometimes happens that centroids converge only after a lot of
iterations. In order to prevent situations where an excessive number of
centroids is calculated for just one point, the parameter
\code{max_iterations_per_point} is used to stop the centroid
calculations after a certain number of them has been performed.
Nonetheless, the last centroid found before stopping is still taken as
a good enough guess of the nearest mode's position.

After the terminal centroids of the individual points have been
calculated, it can be seen that terminal centroids of points belonging to
the same tree crown are positioned very close to each other, shortly below
the crown's apex. These dense clusters of terminal centroids are
identified with the DBSCAN algorithm which assigns a cluster ID to every
one of them. The cluster IDs are then finally connected back to the points
of the point cloud and used as crown IDs.

The DBSCAN clustering is explained nicely in
\href{https://en.wikipedia.org/wiki/DBSCAN#Preliminary}{Wikipedia} but here
is a quick sketch of what it does: The DBSCAN algorithm classifies points
as either core points, border points, or noise and assigns core and border
points to the same cluster if they are close enough to at least one other
core point of the cluster.

In order to be core points, points need to have enough neighbors. The
parameter \code{dbscan_neighborhood_radius} determines the radius of the
neighborhood and the parameter \code{min_num_points_per_crown}
determines the minimum number of points in the neighborhood (including the
to-be-classified one), which are needed for a core point.

Border points are within the neighborhood of core points but don't have
enough neighbors to be core points themselves. Noise points are not within
the neighborhood of any core point and also don't have enough neighbors to
be core points.

Clusters are identified by iterating over the points and classifying them
one by one. For each point the neighborhood is scanned and the point is
classified accordingly. If the point is a core or border point, the
neighboring points are classified next. As long as it is possible to
directly connect to new core or border points in this way, the same cluster
ID is assigned to each encountered point.
}

\references{
Ferraz, A., S. Saatchi, C. Mallet, and V. Meyer (2016)
\emph{Lidar detection of individual tree size in tropical forests}. Remote
Sensing of Environment 183:318–333. \url{doi:10.1016/j.rse.2016.05.028}

Ferraz, A., F. Bretar, S. Jaquemond, G. Gonçalves, L. Pereira,
M. Tomé, and P. Soares (2012)
\emph{3-D mapping of a multi-layered Mediteranean forest using ALS data}.
Remote Sensing of Environment, 121:210-223.
\doi{10.1016/j.rse.2012.01.020}
}
