% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fastx_synchronize.R
\name{fastx_synchronize}
\alias{fastx_synchronize}
\alias{fastq_synchronize}
\alias{fasta_synchronize}
\title{Synchronize FASTA and FASTQ files or objects}
\usage{
fastx_synchronize(
  file1,
  file2 = NULL,
  file_format = "fastq",
  file1_out = NULL,
  file2_out = NULL
)
}
\arguments{
\item{file1}{(Required). A FASTQ file path, a FASTQ tibble, or a
paired-end tibble of class \code{"pe_df"}. See \emph{Details}.}

\item{file2}{(Optional). A FASTQ file path or a FASTQ tibble. Optional if
\code{file1} is a \code{"pe_df"} object. See \emph{Details}.}

\item{file_format}{(Optional). Format of the input (\code{file1} and
\code{file2}) and the desired output format: \code{"fasta"} or \code{"fastq"}
(default). This determines the format for both outputs.}

\item{file1_out}{(Optional). Name of the output file for synchronized reads
from \code{file1}. The file is in either FASTA or FASTQ format, depending on
\code{file_format}. If \code{NULL} (default), no sequences are written to a
file. See \emph{Details}.}

\item{file2_out}{(Optional). Name of the output file for synchronized reads
from \code{file2}. The file is in either FASTA or FASTQ format, depending on
\code{file_format}. If \code{NULL} (default), no sequences are written to a
file. See \emph{Details}.}
}
\value{
A tibble or \code{NULL}.

If both \code{file1_out} and \code{file2_out} are \code{NULL}, a tibble
containing the synchronized reads from \code{file1} is returned. The
synchronized reads from \code{file2} are accessible via the \code{"reverse"}
attribute of the returned tibble.

If both \code{file1_out} and \code{file2_out} are specified, the synchronized
sequences are written to the specified output files, and no tibble is
returned.
}
\description{
\code{fastx_synchronize} synchronizes sequences between two
FASTA/FASTQ files or objects by retaining only the common sequences present
in both.
}
\details{
\code{file1} and \code{file2} can either be paths to FASTA/FASTQ files or
tibble objects containing the sequences.
FASTA objects are tibbles that contain the columns \code{Header} and
\code{Sequence}, see \code{\link[microseq]{readFasta}}. FASTQ objects are
tibbles that contain the columns \code{Header}, \code{Sequence}, and
\code{Quality}, see \code{\link[microseq]{readFastq}}.

If \code{file1} is an object of class \code{"pe_df"}, the second read tibble
is automatically extracted from its \code{"reverse"} attribute unless
explicitly provided via the \code{file2} argument. This allows streamlined
input handling for paired-end tibbles created by
\code{\link{vs_fastx_trim_filt}}.

Sequence IDs in the \code{Header} fields must be identical for each read pair
in both \code{file1} and \code{file2} for synchronization to work correctly.

If \code{file1_out} and \code{file2_out} are specified, the synchronized
sequences are written to these files in the format specified by
\code{file_format}.

If \code{file1_out} and \code{file2_out} are \code{NULL}, the function
returns a FASTA/FASTQ object containing synchronized reads from \code{file1}.
The synchronized reads from \code{file2} are included as an attribute named
\code{"reverse"} in the returned tibble.

The returned tibble is assigned the S3 class \code{"pe_df"}, indicating that
it represents paired-end sequence data. Downstream functions can use this
class tag to distinguish paired-end tibbles from other tibbles.

Both \code{file1_out} and \code{file2_out} must either be \code{NULL} or both
must be character strings specifying the file paths.
}
\examples{
# Define arguments
file1 <- system.file("extdata/small_R1.fq", package = "Rsearch")
file2 <- system.file("extdata/small_R1.fq", package = "Rsearch")
file_format <- "fastq"
file1_out <- NULL
file2_out <- NULL

# Synchronize files and return as a tibble
sync_seqs <- fastx_synchronize(file1 = file1,
                               file2 = file2,
                               file_format = file_format,
                               file1_out = file1_out,
                               file2_out = file2_out)

# Extract tibbles with synchronized sequences
R1_sync <- sync_seqs
R2_sync <- attr(sync_seqs, "reverse")

# Synchronize files and write to output files

# Define output file names
out1 <- tempfile(fileext = ".fastq")
out2 <- tempfile(fileext = ".fastq")

fastx_synchronize(file1 = file1,
                  file2 = file2,
                  file_format = file_format,
                  file1_out = out1,
                  file2_out = out2)

\dontshow{unlink(c(out1, out2))}

}
