\documentclass[ALCO,Unicode,published]{cedram}
\usepackage{float}
\usepackage{hyperref}
\usepackage{caption}
\usepackage{microtype}
\usepackage{tikz}
\usetikzlibrary{shapes}
\makeatletter
\providecommand{\tabularnewline}{\\}
\setlength{\abovecaptionskip}{0pt}
\setlength{\belowcaptionskip}{5pt}
\newcommand{\leqnomode}{\tagsleft@true\let\veqno\@@leqno}
\newcommand{\reqnomode}{\tagsleft@false\let\veqno\@@eqno}
\reqnomode
\title[A lifting of the G.--J.\ cluster method to the M.--R.\ algebra]{A lifting of the Goulden--Jackson cluster method to the Malvenuto--Reutenauer algebra}
\author[\initial{Y.} Zhuang]{\firstname{Yan} \lastname{Zhuang}}
\address{Davidson College\\
405 N. Main St.\\
Davidson, NC 28035 (USA)}
\thanks{The author
was partially supported by an AMS-Simons travel grant.}
\email{yazhuang@davidson.edu}
\keywords{permutation statistics, consecutive patterns, Goulden--Jackson cluster method, Malvenuto--Reutenauer algebra, shuffle-compatibility}
\subjclass{05A05, 05A15, 05E05}
\makeatother
%\datereceived{2021-08-23}
%\daterevised{2022-06-29}
%\dateaccepted{2022-07-05}
\datepublished{2022-12-19}
\begin{document}
\theoremstyle{remark}
\newtheorem{claim}[cdrthm]{Claim}
\equalenv{lem}{lemm}
\global\long\def\iDes{\operatorname{iDes}}%
\global\long\def\des{\operatorname{des}}%
\global\long\def\maj{\operatorname{maj}}%
\global\long\def\Pk{\operatorname{Pk}}%
\global\long\def\pk{\operatorname{pk}}%
\global\long\def\lpk{\operatorname{lpk}}%
\global\long\def\std{\operatorname{std}}%
\global\long\def\inv{\operatorname{inv}}%
\global\long\def\Des{\operatorname{Des}}%
\global\long\def\Comp{\operatorname{Comp}}%
\global\long\def\occ{\operatorname{occ}}%
\global\long\def\mk{\operatorname{mk}}%
\global\long\def\Av{\operatorname{Av}}%
\global\long\def\ides{\operatorname{ides}}%
\global\long\def\imaj{\operatorname{imaj}}%
\global\long\def\icomaj{\operatorname{icomaj}}%
\global\long\def\comaj{\operatorname{comaj}}%
\global\long\def\ipk{\operatorname{ipk}}%
\global\long\def\ilpk{\operatorname{ilpk}}%
\global\long\def\st{\operatorname{st}}%
\global\long\def\ist{\operatorname{ist}}%
\newcommand{\litspace}{\,}
\begin{abstract}
The Goulden\textendash Jackson cluster method is a powerful tool for
counting words by occurrences of prescribed subwords, and was adapted
by Elizalde and Noy for counting permutations by occurrences of prescribed
consecutive patterns. In this paper, we lift the cluster method for
permutations to the Malvenuto\textendash Reutenauer algebra. Upon
applying standard homomorphisms, our result specializes to both the
cluster method for permutations as well as a $q$-analogue which keeps
track of the inversion number statistic. We construct additional homomorphisms
using the theory of shuffle-compatibility, leading to further specializations
which keep track of various ``inverse statistics'', including the
inverse descent number, inverse peak number, and inverse left peak
number. This approach is then used to derive formulas for counting
permutations by occurrences of two families of consecutive patterns\textemdash monotone
patterns and transpositional patterns\textemdash refined by these
statistics.
\end{abstract}
\maketitle
\section{Introduction}
Let $\mathfrak{S}_{n}$ denote the symmetric group of permutations
on the set $[n]\coloneqq\{1,2,\dots,n\}$ (where $\mathfrak{S}_{0}$
consists of the empty permutation), and let $\mathfrak{S}\coloneqq\bigsqcup_{n=0}^{\infty}\mathfrak{S}_{n}$.
We write permutations in one-line notation\textemdash that is, $\pi=\pi_{1}\pi_{2}\cdots\pi_{n}$\textemdash and
the $\pi_{i}$ are called \textit{letters} of $\pi$. The \textit{length}
of $\pi$ is the number of letters in $\pi$, so that $\pi$ has length
$n$ whenever $\pi\in\mathfrak{S}_{n}$.
For a sequence of distinct integers $w$, the \textit{standardization}
of $w$\textemdash denoted $\std(w)$\textemdash is defined to be
the permutation in $\mathfrak{S}$ obtained by replacing the smallest
letter of $w$ with 1, the second smallest with 2, and so on. As an
example, we have $\std(73184)=42153$. Given permutations $\pi\in\mathfrak{S}_{n}$
and $\sigma\in\mathfrak{S}_{m}$, we say that $\pi$ \textit{contains}
$\sigma$ (as a \textit{consecutive pattern}) if $\std(\pi_{i}\pi_{i+1}\cdots\pi_{i+m-1})=\sigma$
for some $i\in[n-m+1]$, and in this case we call $\pi_{i}\pi_{i+1}\cdots\pi_{i+m-1}$
an \textit{occurrence} of $\sigma$ (as a consecutive pattern) in
$\pi$. For instance, the permutation $315497628$ has three occurrences
of the consecutive pattern $213$, namely $315$, $549$, and $628$.
On the other hand, $137258469$ has no occurrences of $213$.
Let $\occ_{\sigma}(\pi)$ denote the number of occurrences of $\sigma$
in $\pi$. If $\occ_{\sigma}(\pi)=0$, then we say that $\pi$ \textit{avoids}
$\sigma$ (as a consecutive pattern). If $\Gamma\subseteq\mathfrak{S}$,
then we let $\mathfrak{S}_{n}(\Gamma)$ denote the subset of permutations
in $\mathfrak{S}_{n}$ avoiding every permutation in $\Gamma$ as
a consecutive pattern. When $\Gamma$ consists of a single permutation
$\sigma$, we shall simply write $\mathfrak{S}_{n}(\sigma)$ as opposed
to $\mathfrak{S}_{n}(\{\sigma\})$. (We use the same convention for
other notations involving a set $\Gamma$ of permutations when $\Gamma$
is a singleton.) As observed earlier, we have $137258469\in\mathfrak{S}_{9}(213)$.
For the rest of this paper, the notions of occurrence and avoidance
of patterns in permutations always refer to consecutive patterns unless
otherwise stated.
The study of consecutive patterns in permutations, initiated by Elizalde
and Noy \cite{Elizalde2003} in 2003, extends the study of classical
patterns in permutations originating in the work of Simion and Schmidt
\cite{Simion1985}. Consecutive patterns in permutations are analogous
to consecutive subwords in words, where repetition of letters is allowed.
In the latter realm, the cluster method of Goulden and Jackson \cite{Goulden1979}
provides a very general formula expressing the generating function
for words by occurrences of prescribed subwords in terms of a ``cluster
generating function'', which is easier to compute. By setting the
variable keeping track of occurrences to zero, this yields a powerful
approach for counting words avoiding a prescribed set of subwords.
In 2012, Elizalde and Noy \cite{Elizalde2012} adapted the Goulden\textendash Jackson
cluster method to the setting of permutations, which they used to
obtain differential equations satisfied by $\omega_{\sigma}(s,x)=(\sum_{n=0}^{\infty}\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\sigma}(\pi)}x^{n}/n!)^{-1}$
for various families of consecutive patterns $\sigma$, including
``monotone patterns'', ``chain patterns'', and ``non-overlapping patterns''.
Solving these differential equations for $\omega_{\sigma}(s,x)$ then
allows one to count permutations by the number of occurrences of $\sigma$.
Over the past decade, Elizalde and Noy's adaptation of the cluster
method for permutations has become a standard tool in the study of
consecutive patterns; see \cite{Beaton2017,Crane2018,Dwyer2018,Elizalde2013,Elizalde2016,Lee2019}
for a selection of references. One recent development is a $q$-analogue
of the cluster method for permutations which also keeps track of the
inversion number statistic. This $q$-cluster method, due to Elizalde,
was first mentioned in his survey \cite{Elizalde2016} on consecutive
patterns, and was applied to monotone patterns and non-overlapping
patterns by Crane, DeSalvo, and Elizalde \cite{Crane2018} in their
study of the Mallows distribution.
To explain the philosophy which guides our work, let us briefly discuss
a paper by Josuat-Verg\`es, Novelli, and Thibon \cite{Josuat-Verges2012},
in which the authors study alternating permutations (and their analogues
in other Coxeter groups) from the perspective of combinatorial Hopf
algebras. Their starting point is Andr\'e's \cite{Andre1881} famous
exponential generating function $\sec x+\tan x$ for the number of
alternating permutations. The authors note that Andr\'e's formula
has a natural lifting in the Malvenuto\textendash Reutenauer algebra
$\mathbf{FQSym}$, a Hopf algebra whose basis elements correspond
to permutations and whose multiplication encodes ``shifted concatenation''
of permutations. They then recover Andr\'e's formula by applying
a certain homomorphism $\phi$ to its lifting in $\mathbf{FQSym}$,
and in their words:
\begin{quote}
``Such a proof is not only illuminating, it says much more than the
original statement. For example, one can now replace $\phi$ by more
complicated morphisms, and obtain generating functions for various
statistics on alternating permutations.''
\end{quote}
A similar approach to permutation enumeration was taken in a series
of papers by Gessel and the present author \cite{Gessel2019,Gessel2014,Zhuang2016,Zhuang2017},
but instead utilizing homomorphisms on noncommutative symmetric functions.
The main result of this present paper is an analogous lifting of the
Goulden\textendash Jackson cluster method for permutations to the
Malvenuto\textendash Reutenauer algebra. Since the basis elements
of the Malvenuto\textendash Reutenauer algebra correspond to permutations,
our cluster method in $\mathbf{FQSym}$ is in a sense the most general
cluster method possible for permutations. By applying the same homomorphism
$\phi$ used by Josuat-Verg\`es\textendash Novelli\textendash Thibon
to our generalized cluster method, we can recover Elizalde and Noy's
cluster method for permutations, and we can use another homomorphism
to recover Elizalde's $q$-analogue. We also construct other homomorphisms
which lead to new specializations of our cluster method that can be
used to count permutations by occurrences of prescribed patterns while
keeping track of other permutation statistics.
\subsection{Permutation statistics}
The permutation statistics that we shall consider are the ``inverses''
of several classical permutation statistics related to descents and
peaks: the \textit{descent number} $\des$, the \textit{major index}
$\maj$, the \textit{comajor index} $\comaj$, the \textit{peak number}
$\pk$, and the \textit{left peak number} $\lpk$. We define these
statistics below.
\begin{itemize}
\item We call $i\in[n-1]$ a \textit{descent} of $\pi\in\mathfrak{S}_{n}$
if $\pi_{i}>\pi_{i+1}$. Then $\des(\pi)$ is defined to be the number
of descents of $\pi$, and $\maj(\pi)$ the sum of all descents of
$\pi$. In other words, if $\Des(\pi)\coloneqq\{\litspace i\in[n-1]:i\text{ is a descent of }\pi\litspace \}$\textemdash that
is, $\Des(\pi)$ is the \textit{descent set} of $\pi$\textemdash then
\[
\des(\pi)\coloneqq\left|\Des(\pi)\right|\quad\text{and\ensuremath{\quad}}\maj(\pi)\coloneqq\sum_{i\in\Des(\pi)}i.
\]
The comajor index $\comaj$ is a variant of the major index $\maj$,
and is defined by
\begin{equation}
\comaj(\pi)\coloneqq\sum_{i\in\Des(\pi)}(n-i)=n\text{\ensuremath{\des}}(\pi)-\text{\ensuremath{\maj}}(\pi).\label{e-comajdesmaj}
\end{equation}
\item We call $i\in\{2,3,\dots,n-1\}$ a \textit{peak} of $\pi\in\mathfrak{S}_{n}$
if $\pi_{i-1}<\pi_{i}>\pi_{i+1}$. Then $\pk(\pi)$ is defined to
be the number of peaks of $\pi$.
\item We call $i\in[n-1]$ a \textit{left peak} of $\pi\in\mathfrak{S}_{n}$
if $i$ is a peak of $\pi$, or if $i=1$ and $i$
is a descent of $\pi$. Then $\lpk(\pi)$ is defined to be the number
of left peaks of $\pi$.\footnote{Equivalently, $\lpk(\pi)$ is the number of peaks of the permutation
$0\pi$ obtained by prepending 0 to $\pi$.}
\end{itemize}
For example, if $\pi=72163584$, then we have $\Des(\pi)=\{1,2,4,7\}$,
$\des(\pi)=4$, $\maj(\pi)=14$, $\comaj(\pi)=18$, $\pk(\pi)=2$,
and $\lpk(\pi)=3$. We note that, in the language of consecutive patterns,
descents correspond to occurrences of $21$ and peaks correspond to
occurrences of $132$ and $231$.
Given a permutation statistic $\st$, we define its \textit{inverse
statistic} $\ist$ by $\ist(\pi)\coloneqq\st(\pi^{-1})$. Continuing
with the example from above, the inverse of $\pi$ is $\pi^{-1}=32586417$,
so we have $\iDes(\pi)=\{1,4,5,6\}$, $\ides(\pi)=4$, $\imaj(\pi)=16$,
$\icomaj(\pi)=16$, $\ipk(\pi)=1$, and $\ilpk(\pi)=2$. While $\st$
and $\ist$ are obviously equidistributed over $\mathfrak{S}_{n}$,
it is worth studying the joint distribution of $\ist$ and other permutation
statistics over $\mathfrak{S}_{n}$, or the distribution of $\ist$
over restricted sets of permutations (such as pattern avoidance classes).
For instance, Garsia and Gessel \cite{Garsia1979} studied the joint
distribution of $\des$, $\ides$, $\maj$, and $\imaj$ over $\mathfrak{S}_{n}$.
Let $\Gamma$ be a set of consecutive patterns and $\occ_{\Gamma}(\pi)$
the number of occurrences in $\pi$ of patterns in $\Gamma$. In this
paper, we will consider the polynomials {\allowdisplaybreaks
\begin{align*}
A_{\Gamma,n}^{(\ides,\imaj)}(s,t,q) & \coloneqq\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}t^{\ides(\pi)+1}q^{\imaj(\pi)},\\
A_{\Gamma,n}^{(\ides,\icomaj)}(s,t,q) & \coloneqq\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}t^{\ides(\pi)+1}q^{\icomaj(\pi)},\\
A_{\Gamma,n}^{\ides}(s,t) & \coloneqq\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}t^{\ides(\pi)+1},\\
P_{\Gamma,n}^{\ipk}(s,t) & \coloneqq\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}t^{\ipk(\pi)+1},\text{ and}\\
P_{\Gamma,n}^{\ilpk}(s,t) & \coloneqq\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}t^{\ilpk(\pi)}
\end{align*}
}where $n\geq1$, and with each of these polynomials defined to be
1 when $n=0$. These polynomials give the joint distribution of the
occurrence statistic $\occ_{\Gamma}$ along with each of the statistics
$(\ides,\imaj)$, $(\ides,\icomaj)$, $\ides$, $\ipk$, and $\ilpk$.
Setting $s=0$ in any of these polynomials then gives the distribution
of the corresponding statistic over the pattern avoidance class $\mathfrak{S}_{n}(\Gamma)$.
For convenience, let us define $A_{\Gamma,n}^{(\ides,\imaj)}(t,q)\coloneqq A_{\Gamma,n}^{(\ides,\imaj)}(0,t,q)$
and the polynomials $A_{\Gamma,n}^{(\ides,\icomaj)}(t,q)$, $A_{\Gamma,n}^{\ides}(t)$,
$P_{\Gamma,n}^{\ipk}(t)$, and $P_{\Gamma,n}^{\ilpk}(t)$ analogously.
The reason why we consider the statistics $(\ides,\imaj)$, $(\ides,\icomaj)$,
$\ides$, $\ipk$, and $\ilpk$ is because they are inverses of ``shuffle-compatible''
statistics. Roughly speaking, a permutation statistic $\st$ is shuffle-compatible
if the distribution of $\st$ over the set of shuffles of two permutations
$\pi$ and $\sigma$ depends only on $\st(\pi)$, $\st(\sigma)$,
and the lengths of $\pi$ and $\sigma$. (See Section 2.3 for precise
definitions.) If $\st$ is shuffle-compatible and is a coarsening
of the descent set, then $\st$ induces a quotient of the algebra
$\mathrm{QSym}$ of quasisymmetric functions, denoted ${\mathcal A}_{\st}$.
By composing the quotient map from $\mathrm{QSym}$ to ${\mathcal A}_{\st}$
with the canonical surjection from $\mathbf{FQSym}$ to $\mathrm{QSym}$,
we obtain a homomorphism on $\mathbf{FQSym}$ which can be used to
count permutations by the corresponding inverse statistic. Applying
these homomorphisms to our generalized cluster method in $\mathbf{FQSym}$
yields specializations that refine by the statistics $(\ides,\icomaj)$,
$\ides$, $\ipk$, and $\ilpk$.\footnote{We do not explicitly give a specialization for $(\ides,\imaj)$, but
one can be obtained using the one for $(\ides,\icomaj)$ and the formula
$\text{\ensuremath{\imaj}}(\pi)=n\text{\ensuremath{\ides}}(\pi)-\text{\ensuremath{\icomaj}}(\pi)$,
which is equivalent to (\ref{e-comajdesmaj}).}
\subsection{Outline}
The structure of this paper is as follows. Section 2 is devoted to
background material. We first give a brief expository account of the
Goulden\textendash Jackson cluster method, both for words and for
permutations. Then, we define quasisymmetric functions and the Malvenuto\textendash Reutenauer
algebra, and review some basic symmetries on permutations (reversal,
complementation, and reverse-complementation) which will play a role
in our work.
The focus of Section 3 is on our main result, the cluster method in
Malvenuto\textendash Reutenauer. We prove our generalized cluster
method and show how it specializes to Elizalde and Noy's cluster method
for permutations as well as its $q$-analogue. In this section, we
also use the theory of shuffle-compatibility to construct homomorphisms
which we then use to obtain further specializations of our generalized
cluster method for the statistics $(\ides,\icomaj)$, $\ides$, $\ipk$,
and $\ilpk$.
In Sections 4 and 5, we apply our general results from Section 3 to
produce formulas for the polynomials $A_{\sigma,n}^{\ides}(s,t)$,
$P_{\sigma,n}^{\ipk}(s,t)$, and $P_{\sigma,n}^{\ilpk}(s,t)$\textemdash and
their $s=0$ evaluations\textemdash where $\sigma$ is a specific
type of consecutive pattern. Section 4 focuses on \textit{monotone
patterns}, i.e. the patterns $12\cdots m$ and $m\cdots21$. Section
5 focuses on the patterns $12\cdots(a-1)(a+1)a(a+2)(a+3)\cdots m$
where $m\geq5$ and $2\leq a\leq m-2$; these patterns were considered
in \cite{Elizalde2012} as a subfamily of ``chain patterns'', and
here we call them \textit{transpositional patterns} because $12\cdots(a-1)(a+1)a(a+2)(a+3)\cdots m$
is precisely the elementary transposition $(a,a+1)$. Most of our
formulas involve the Hadamard product operation on formal power series,
although some ``Hadamard product-free'' formulas are obtained for
monotone patterns. In the case of monotone patterns, we also give
a formula for counting $12\cdots m$-avoiding permutations by inverse
descent number and inverse major index.
We conclude this paper in Section 6 with a brief discussion of ongoing
work and future directions of research. See \cite{Zhuang2022} for an extended abstract summarizing the results of this paper, as well as \cite{Zhuang2021a} for
proofs of two observations (Claims \ref{cl-ipk} and \ref{cl-ilpk})
which are left unproven here.
\section{Preliminaries}
\subsection{The cluster method for words}
We first introduce the Goulden\textendash Jackson cluster method
for words, which we will use to prove our lifting of the cluster method
for permutations to the Malvenuto\textendash Reutenauer algebra. The
exposition in this section follows that in \cite{Zhuang2018}.
For a finite or countably infinite set $A$, let $A^{*}$ be the set
of all finite sequences of elements of $A$, including the empty sequence.
We call $A$ an \textit{alphabet}, the elements of $A$ \textit{letters},
and the elements of $A^{*}$ \textit{words}. The \textit{length} $\left|w\right|$
of a word $w\in A^{*}$ is the number of letters in $w$. For $v,w\in A^{*}$,
we say that $v$ is a \textit{subword} of $w$ if $w=uvu^{\prime}$
for some $u,u^{\prime}\in A^{*}$, and in this case we also say $w$
\textit{contains} $v$ and that $v$ is an \textit{occurrence} of
$w$. The \textit{total algebra} of $A^{*}$ over $\mathbb{Q},$ denoted
$\mathbb{Q}\langle\langle A^{*}\rangle\rangle$, is the $\mathbb{Q}$-algebra
of formal sums of words in $A^{*}$ where multiplication is the concatenation
product.
Given a word $w=w_{1}w_{2}\cdots w_{n}\in A^{*}$ and a set $B\subseteq A^{*}$,
we say that $(i,v)$ is a \textit{marked occurrence} of $v\in B$
in $w$ if
\[
v=w_{i}w_{i+1}\cdots w_{i+\left|v\right|-1},
\]
that is, $v$ is a subword of $w$ starting at position $i$. Moreover,
we say that $(w,T)$ is a \textit{marked word} on $w$ (with respect
to $B$) if $w\in A^{*}$ and $T$ is a set of some marked occurrences
in $w$ of words in $B$.
To illustrate, suppose that $A=\{a,b,c\}$ and $B=\{cab,bc\}$. Then
\begin{equation}
(cabcabbca,\{(1,cab),(3,bc),(7,bc)\})\label{e-mkwrd}
\end{equation}
is a marked word on $w=cabcabbca$ with respect to $B$. Informally,
we will display a marked word $(w,T)$ as the word $w$ with the marked
occurrences in $T$ circled, so that (\ref{e-mkwrd}) is displayed
as
\begin{center}
\begin{tikzpicture}
\node at (0,0) {$c\;a\;b\;c\;a\;b\;b\;c\;a\;.$};
\draw[red] (-0.88,0) ellipse (12bp and 7.5bp);
\draw[red] (-0.48,0) ellipse (7.5bp and 7.5bp);
\draw[red] (0.54,0) ellipse (7.5bp and 7.5bp);
\end{tikzpicture}
\end{center}
We define the concatenation of two marked words in the obvious way.
For example, (\ref{e-mkwrd}) can be obtained by concatenating $(cabca,\{(1,cab),(3,bc)\})$
and $(bbca,\{(2,bc)\})$, i.e. \begin{center}
\begin{tikzpicture}
\node at (0,0) {$c\;a\;b\;c\;a\quad$ and $\quad b\;b\;c\;a\;.$};
\draw[red] (-1.58,0) ellipse (12bp and 7.5bp);
\draw[red] (-1.18,0) ellipse (7.5bp and 7.5bp);
\draw[red] (1.25,0) ellipse (7.5bp and 7.5bp);
\end{tikzpicture}
\end{center}
A marked word is called a \textit{cluster} if it is not a concatenation
of two nonempty marked words. (In particular, we will call a cluster
with respect to $B$ a $B$-\textit{cluster}.) So, (\ref{e-mkwrd})
is not a cluster, but\begin{center}
\begin{tikzpicture}
\node at (0,0) {$b\;c\;a\;b\;c\;a\;b$};
\draw[red] (-0.67,0) ellipse (7.5bp and 7.5bp);
\draw[red] (-0.27,0) ellipse (11bp and 7.5bp);
\draw[red] (0.13,0) ellipse (7.5bp and 7.5bp);
\draw[red] (0.52,0) ellipse (11bp and 7.5bp);
\end{tikzpicture}
\end{center}
\noindent is a cluster.
For a word $w\in A^{*}$, let $\occ_{B}(w)$ be the number of occurrences
in $w$ of words in $B$ and let $C_{B,w}$ be the set of all $B$-clusters
on $w$. If $c$ is a $B$-cluster, then we let $\mk_{B}(c)$ be the
number of marked occurrences in $c$. Define
\[
F_{B}(s)\coloneqq\sum_{w\in A^{*}}ws^{\occ_{B}(w)}\quad\text{and}\quad R_{B}(s)\coloneqq\sum_{w\in A^{*}}w\sum_{c\in C_{B,w}}s^{\mk_{B}(c)},
\]
so that $F_{B}(s)$ is the generating function for words in $A^{*}$
by the number of occurrences of words in $B$, and $R_{B}(s)$ is
the generating function for $B$-clusters by the number of marked
occurrences. Both $F_{B}(s)$ and $R_{B}(s)$ are elements of the
formal power series algebra $\mathbb{Q}\langle\langle A^{*}\rangle\rangle[[s]]$,
so the variable $s$ commutes with letters in $A$ (but the letters
in $A$ do not commute with each other).
\begin{thm}[Cluster method for words]
\label{t-gjcm} Let $A$ be an alphabet and let $B\subseteq A^{*}$
be a set of words, each of length at least 2. Then
\[
F_{B}(s)=\bigg(1-\sum_{a\in A}a-R_{B}(s-1)\bigg)^{-1}.
\]
\end{thm}
This is a noncommutative version of the original cluster method of
Goulden and Jackson, but the proofs are essentially the same; see,
e.g. \cite[Theorem 1]{Zhuang2018} for details.
\subsection{The cluster method for permutations}
Next, we describe Elizalde and Noy's \cite{Elizalde2012} adaptation
of the cluster method for permutations, as well as its $q$-analogue
which refines by the inversion number. The terms \textit{marked occurrence},
\textit{marked permutation}, \textit{concatenation}, and \textit{cluster}
are defined for permutations in the analogous way as for words, but
with the notion of word containment replaced by permutation containment
(in the sense of consecutive patterns). It is worth pointing out that,
unlike concatenation of marked words, concatenation of marked permutations
is not unique. For instance, both\begin{center}
\begin{tikzpicture}
\node at (0,0) {$3\;2\;1\;4\;6\;7\;8\;5\;9\quad$ and $\quad 7\;5\;1\;8\;3\;4\;6\;2\;9$};
\draw[red] (-2.49,0) ellipse (12bp and 9bp);
\draw[red] (-1.65,0) ellipse (12bp and 9bp);
\draw[red] (-1.11,0) ellipse (12bp and 9bp);
\draw[red] (1.38,0) ellipse (12bp and 9bp);
\draw[red] (2.22,0) ellipse (12bp and 9bp);
\draw[red] (2.76,0) ellipse (12bp and 9bp);
\end{tikzpicture}
\end{center}are concatenations of \begin{center}
\begin{tikzpicture}
\node at (0,0) {$3\;2\;1\;4\quad$ and $\quad 2\;3\;4\;1\;5\;.$};
\draw[red] (-1.34,0) ellipse (12bp and 9bp);
\draw[red] (0.89,0) ellipse (12bp and 9bp);
\draw[red] (1.44,0) ellipse (12bp and 9bp);
\end{tikzpicture}
\end{center}However, this does not make a difference in defining clusters for
permutations or in adapting the cluster method to the setting of permutations.
Let $\Gamma\subseteq\mathfrak{S}$. Recall that $\occ_{\Gamma}(\pi)$
is the number of occurrences in $\pi$ of patterns in $\Gamma$, and
let $C_{\Gamma,\pi}$ be the set of all $\Gamma$-clusters on $\pi$.
If $c$ is a $\Gamma$-cluster, let $\mk_{\Gamma}(c)$ be the number
of marked occurrences in $c$. Define
\begin{align*}
F_{\Gamma}(s,x) & \coloneqq\sum_{n=0}^{\infty}\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}\frac{x^{n}}{n!}\quad\text{and}\\
R_{\Gamma}(s,x) & \coloneqq\sum_{n=0}^{\infty}\sum_{\pi\in\mathfrak{S}_{n}}\sum_{c\in C_{\Gamma,\pi}}s^{\mk_{\Gamma}(c)}\frac{x^{n}}{n!}=\sum_{n=0}^{\infty}\sum_{k=0}^{\infty}r_{\Gamma,n,k}s^{k}\frac{x^{n}}{n!}
\end{align*}
where $r_{\Gamma,n,k}$ is the number of $\Gamma$-clusters of length
$n$ with $k$ marked occurrences.
\begin{thm}[Cluster method for permutations]
\label{t-gjcmperm}Let $\Gamma\subseteq\mathfrak{S}$ be a set of
permutations, each of length at least 2. Then
\[
F_{\Gamma}(s,x)=(1-x-R_{\Gamma}(s-1,x))^{-1}.
\]
\end{thm}
Elizalde and Noy give Theorem \ref{t-gjcmperm} in the special case
where $\Gamma$ consists of a single pattern \cite[Theorem 1.1]{Elizalde2012},
but in Section 3 we will recover this more general result from our
cluster method in the Malvenuto\textendash Reutenauer algebra.
The $n$th $q$-\textit{factorial} $[n]_{q}!$ is defined by
\[
[n]_{q}!\coloneqq(1+q)(1+q+q^{2})\cdots(1+q+\cdots+q^{n-1})
\]
for $n\geq1$ and $[0]_{q}!\coloneqq1$. Later, we will also need
the $q$-\textit{binomial coefficient} defined by
\[
{\binom{n}{k}}_{\!\!q}\coloneqq\frac{[n]_{q}!}{[k]_{q}!\litspace [n-k]_{q}!}
\]
for all $n\geq0$ and $0\leq k\leq n$.
We say that $(i,j)\in[n]^{2}$ is an \textit{inversion} of $\pi\in\mathfrak{S}_{n}$
if $i\pi_{j}$. Let $\inv(\pi)$ denote the number
of inversions of $\pi$. Define
\begin{alignat*}{1}
F_{\Gamma}(s,q,x) & \coloneqq\sum_{n=0}^{\infty}\sum_{\pi\in\mathfrak{S}_{n}}s^{\occ_{\Gamma}(\pi)}q^{\inv(\pi)}\frac{x^{n}}{[n]_{q}!}\quad\text{and}\\
R_{\Gamma}(s,q,x) & \coloneqq\sum_{n=0}^{\infty}\sum_{\pi\in\mathfrak{S}_{n}}q^{\inv(\pi)}\sum_{c\in C_{\Gamma,\pi}}s^{\mk_{\Gamma}(c)}\frac{x^{n}}{[n]_{q}!}=\sum_{n=0}^{\infty}\sum_{k=0}^{\infty}\sum_{j=0}^{\infty}r_{\Gamma,n,k,j}q^{j}s^{k}\frac{x^{n}}{[n]_{q}!}
\end{alignat*}
where $r_{\Gamma,n,k,j}$ is the number of $\Gamma$-clusters of length
$n$ with $k$ marked occurrences and whose underlying permutation
has $j$ inversions. The next result is \cite[Theorem 2.3]{Crane2018},
but for a set $\Gamma$ of patterns rather than a single pattern $\sigma$.
\begin{thm}[$q$-Cluster method for permutations]
\label{t-qgcjmperm}Let $\Gamma\subseteq\mathfrak{S}$ be a set of
permutations, each of length at least 2. Then
\[
F_{\Gamma}(s,q,x)=(1-x-R_{\Gamma}(s-1,q,x))^{-1}.
\]
\end{thm}
See \cite[Corollary 1]{Rawlings2007} for a related result. Like with
Theorem \ref{t-gjcmperm}, we will later recover Theorem \ref{t-qgcjmperm}
as a specialization of our generalized cluster method.
Let us give one more definition before continuing. Given $\sigma\in\mathfrak{S}_{m}$,
let
\[
O_{\sigma}\coloneqq\{\litspace i\in[m-1]:\std(\sigma_{i+1}\sigma_{i+2}\cdots\sigma_{m})=\std(\sigma_{1}\sigma_{2}\cdots\sigma_{m-i})\litspace\}
\]
be the \textit{overlap set} of $\sigma$. The notion of overlap set
is useful for characterizing $\Gamma$-clusters where $\Gamma$ consists
of a single pattern $\sigma$, and we will do this in Sections 4.1
and 5.1.
\subsection{Quasisymmetric functions and shuffle-compatibility}
A permutation in $\mathfrak{S}_{n}$ can be characterized as a word
in $[n]^{*}$ of length $n$ consisting of distinct letters. Let $\mathbb{P}$
be the set of positive integers, and let $\mathfrak{P}_{n}$ denote
the set of words in $\mathbb{P}^{*}$ of length $n$ consisting of
distinct letters\textemdash not necessarily from 1 to $n$. Also,
let $\mathfrak{P}\coloneqq\bigsqcup_{n=0}^{\infty}\mathfrak{P}_{n}$.
In this section only, we will use the term ``permutation'' to refer
more generally to elements of $\mathfrak{P}$. Observe that any statistic
$\st$ defined on permutations in $\mathfrak{S}$ can be extended
to $\mathfrak{P}$ by letting $\st(\pi)\coloneqq\st(\std(\pi))$ for
$\pi\in\mathfrak{P}$.
Every permutation in $\mathfrak{P}$ can be uniquely decomposed into
a sequence of maximal increasing consecutive subsequences, which we
call \textit{increasing runs}. Equivalently, an increasing run of
$\pi$ is a maximal consecutive subsequence containing no descents.
The \textit{descent composition} of $\pi$, denoted $\Comp(\pi)$,
is the composition whose parts are the lengths of the increasing runs
of $\pi$ in the order that they appear. For instance, the increasing
runs of $\pi=85712643$ are $8$, $57$, $126$, $4$, and $3$, so
the descent composition of $\pi$ is $\Comp(\pi)=(1,2,3,1,1)$. We
use the notations $L\vDash n$ and $\left|L\right|=n$ to indicate
that $L$ is a composition of $n$, so that $L\vDash n$ and $\left|L\right|=n$
whenever $L$ is the descent composition of a permutation in $\mathfrak{P}_{n}$.
For a composition $L=(L_{1},L_{2},\dots,L_{k})$, let $\Des(L)\coloneqq\{L_{1},L_{1}+L_{2},\dots,L_{1}+\cdots+L_{k-1}\}$.
It is easy to see that if $L$ is the descent composition of $\pi$,
then $\Des(L)$ is the descent set of $\pi$.
If $\pi\in\mathfrak{P}_{m}$ and $\sigma\in\mathfrak{P}_{n}$ are
\textit{disjoint}\textemdash that is, if they have no letters in common\textemdash then
we call $\tau\in\mathfrak{P}_{m+n}$ a \textit{shuffle} of $\pi$
and $\sigma$ if both $\pi$ and $\sigma$ are subsequences of $\tau$.
The set of shuffles of $\pi$ and $\sigma$ is denoted $S(\pi,\sigma)$.
For example, we have
\[
S(31,25)=\{3125,3215,3251,2315,2351,2531\}.
\]
Let $x_{1},x_{2},\dots$ be commuting variables. A formal power series
$f\in\mathbb{Q}[[x_{1},x_{2},\dots]]$ of bounded degree is called
a \textit{quasisymmetric function} if for any positive integers $a_{1},a_{2},\dots,a_{k}$,
if $i_{1}