Appendix A: Proof of Lemma 3.1
Note that for all \(g \in E^*,x \in E,\zeta >0\),
$$\begin{aligned} \langle g,x \rangle + \frac{\zeta }{2} \Vert x\Vert ^2 \ge -\frac{1}{\zeta } \Vert g\Vert _*^2. \end{aligned}$$
(45)
Let us first prove that the statement is true for \(k=0\). Since \(\alpha _{0} = A_0\), we have
$$\begin{aligned} \begin{array}{lcl} \varPsi _0^*&{} \mathop {=}\limits ^{(18),(12)} &{}\beta _0 d(y_0) + \alpha _0 \left[ F_0+ \langle G_0, y_0-x_0 \rangle + h(y_0) \right] \nonumber \\ &{} \mathop {\ge }\limits ^{(4)} &{}\frac{\beta _0}{2}\Vert y_0-x_0\Vert ^2 + \alpha _0 \left[ F_0+ \langle G_0, y_0-x_0 \rangle + h(y_0) \right] \nonumber \\ &{} \mathop {\ge }\limits ^{(7)} &{}\alpha _0 \left[ F_0+ \langle G_0, y_0-x_0 \rangle + h(y_0) + \frac{\beta _0}{2}\Vert y_0-x_0\Vert ^2\right] \nonumber \\ &{} = &{}\alpha _0 \left[ f_0+ \langle g_0, y_0-x_0 \rangle + h(y_0) + \frac{L}{2}\Vert y_0-x_0\Vert ^2\right] \nonumber \\ &{} &{}+\, \alpha _0 \left[ F_0-f_0+ \langle G_0-g_0, y_0-x_0 \rangle + \frac{\beta _0-L}{2}\Vert y_0-x_0\Vert ^2\right] \nonumber \\ &{} \mathop {\ge }\limits ^{(2),(45)} &{} \alpha _0 \left[ f(y_0)+ h(y_0) - \delta \right] + \alpha _0 \left[ F_0-f_0 \right] - \frac{\alpha _0}{\beta _0-L}\Vert G_0-g_0\Vert ^2_*. \nonumber \end{array} \end{aligned}$$
In view of \(\alpha _{0} = A_0 = B_0\), this proves (19) for \(k=0\).
Let us assume that (19) holds for some \(k \ge 0\) and prove that it also holds for \(k +1\). Let \(gh(z_k) \in \partial h(z_k)\). From the optimality condition in (13), we have
$$\begin{aligned} \left\langle \beta _k \nabla d(z_k) + \sum _{i=0}^k{\alpha _i G_i} + A_k gh(z_k), x-z_k \right\rangle \ge 0 , \quad \forall x \in Q \nonumber \end{aligned}$$
and, hence,
$$\begin{aligned} \beta _k \langle \nabla d(z_k) , x-z_k \rangle \ge \sum _{i=0}^k{\alpha _i} \langle G_i, z_k - x \rangle + A_k \langle gh(z_k), z_k - x \rangle , \quad \forall x \in Q. \nonumber \\ \end{aligned}$$
(46)
At the same time, due to the convexity of h(x)
$$\begin{aligned}&A_{k+1}h(x)+A_k\langle gh(z_k) , z_k -x \rangle \mathop {=}\limits ^{(10)} A_{k}h(x)+A_k\langle gh(z_k) , z_k -x \rangle + \alpha _{k+1} h(x) \nonumber \\&\quad \ge A_k h(z_k) + A_k\langle gh(z_k) , x -z_k \rangle + A_k\langle gh(z_k) , z_k -x \rangle + \alpha _{k+1} h(x)\nonumber \\&\quad \mathop {=}\limits ^{(10)} A_k h(z_k)+ \alpha _{k+1} h(x) \end{aligned}$$
(47)
Now we get for all \(x \in Q\)
$$\begin{aligned} \begin{array}{lcl} \varPsi _{k+1}(x)&{} \mathop {=}\limits ^{(18)} &{}\beta _{k+1} d(x) + \sum _{i=0}^{k+1}{\alpha _i \left[ F_i+ \langle G_i, x-x_i \rangle \right] } + A_{k+1} h(x) \\ &{} \mathop {\ge }\limits ^{(7),(5)} &{} \beta _{k} V(x,z_k) + \beta _{k}d(z_k) +\beta _{k} \langle \nabla d(z_k), x - z_k \rangle + \\ &{} &{}+ \sum _{i=0}^{k+1}{\alpha _i \left[ F_i+ \langle G_i, x-x_i \rangle \right] } + A_{k+1} h(x) \\ &{} \mathop {\ge }\limits ^{(46)} &{} \beta _{k} V(x,z_k) + \beta _{k}d(z_k) + \sum _{i=0}^k{\alpha _i} \langle G_i, z_k - x \rangle + A_k \langle gh(z_k), z_k - x \rangle \\ &{} &{}+ \sum _{i=0}^{k+1}{\alpha _i \left[ F_i+ \langle G_i, x-x_i \rangle \right] } + A_{k+1} h(x) \\ &{} = &{} \beta _{k} V(x,z_k) + \beta _{k}d(z_k) + A_{k+1} h(x) + A_k\langle gh(z_k), z_k-x \rangle \\ &{}&{} + \sum _{i=0}^{k}{\alpha _i \left[ F_i+ \langle G_i, z_k-x_i \rangle \right] } + \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle \right] \\ &{} \mathop {\ge }\limits ^{(47)} &{} \beta _{k} V(x,z_k) +\beta _{k}d(z_k) + \sum _{i=0}^{k}{\alpha _i \left[ F_i+ \langle G_i, z_k-x_i \rangle \right] } + A_kh(z_k) + \\ &{}&{} + \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle + h(x) \right] \\ &{}\mathop {=}\limits ^{(18),(13)} &{} \beta _{k} V(x,z_k) +\varPsi _{k}^*+ \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle + h(x) \right] . \end{array} \end{aligned}$$
(48)
Also, since \(A_k \mathop {=}\limits ^{(10)} (A_{k+1}-B_{k+1}) + (B_{k+1}-\alpha _{k+1})\), we have
$$\begin{aligned}&\qquad \varPsi _{k}^*+ \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle + h(x) \right] \nonumber \\&\mathop {\ge }\limits ^{(19)} A_k \varphi (y_k) - E_k + \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle + h(x) \right] \nonumber \\&\mathop {=}\limits ^{(1)} (A_{k+1}-B_{k+1})f(y_k) + (B_{k+1}-\alpha _{k+1}) f(y_k) +A_k h(y_k) - E_k \nonumber \\&\qquad +\, \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle + h(x) \right] \nonumber \\&\mathop {\ge }\limits ^{(2)} (A_{k+1}-B_{k+1})f(y_k) + (B_{k+1}-\alpha _{k+1}) (f_{k+1} + \langle g_{k+1} , y_k -x_{k+1} \rangle ) + \nonumber \\&\qquad + A_k h(y_k) - E_k + \alpha _{k+1} \left[ F_{k+1}+ \langle G_{k+1}, x-x_{k+1} \rangle \right] +\, \alpha _{k+1} h(x) \nonumber \\&= (A_{k+1}-B_{k+1})f(y_k) + (B_{k+1}-\alpha _{k+1}) f_{k+1} + \alpha _{k+1}F_{k+1} \nonumber \\&\qquad + (B_{k+1}-\alpha _{k+1})\langle g_{k+1} , y_k -x_{k+1} \rangle + \alpha _{k+1} \langle G_{k+1}, x-x_{k+1} \rangle \nonumber \\&\qquad + A_k h(y_k)- E_k + \alpha _{k+1} h(x) \nonumber \\&= (A_{k+1}-B_{k+1})f(y_k) + (B_{k+1}-\alpha _{k+1}) (f_{k+1} - F_{k+1}) + B_{k+1}F_{k+1} \\&\qquad + (B_{k+1}-\alpha _{k+1}) \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \nonumber \\&\qquad +\langle G_{k+1}, (B_{k+1}-\alpha _{k+1}) (y_k - x_{k+1}) + \alpha _{k+1}(x-x_{k+1}) \rangle \nonumber \\&\qquad + A_k h(y_k)- E_k + \alpha _{k+1} h(x) \nonumber \\&\mathop {=}\limits ^{(10)} (A_{k+1}-B_{k+1})f(y_k) + (B_{k+1}-\alpha _{k+1}) (f_{k+1} - F_{k+1}) + B_{k+1}F_{k+1} \nonumber \\&\qquad + (B_{k+1}-\alpha _{k+1}) \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \nonumber \\&\qquad +\langle G_{k+1}, (B_{k+1}-\alpha _{k+1}) (y_k - x_{k+1}) + \alpha _{k+1}(x-x_{k+1}) \rangle \nonumber \\&\qquad + (A_{k+1} - B_{k+1} + B_{k+1} - \alpha _{k+1}) h(y_k)- E_k + \alpha _{k+1} h(x) \nonumber \\&\mathop {=}\limits ^{(14)} (A_{k+1}-B_{k+1})( f(y_k) + h(y_k)) \nonumber \\&\qquad + (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle + h(y_k) \right] \nonumber \\&\qquad + B_{k+1} F_{k+1} +\alpha _{k+1} \langle G_{k+1}, x-z_k \rangle -E_k + \alpha _{k+1} h(x) .\nonumber \end{aligned}$$
(49)
In the last equality we used, from (14) and (11), it follows that
$$\begin{aligned} (B_{k+1}-\alpha _{k+1}) x_{k+1} + \alpha _{k+1} x_{k+1} = \alpha _{k+1}z_k + (B_{k+1}-\alpha _{k+1}) y_{k}. \end{aligned}$$
Hence,
$$\begin{aligned} (B_{k+1}-\alpha _{k+1}) (y_{k} - x_{k+1}) - \alpha _{k+1} x_{k+1} = -\alpha _{k+1}z_k \end{aligned}$$
and
$$\begin{aligned} (B_{k+1}-\alpha _{k+1}) (y_k - x_{k+1}) + \alpha _{k+1}(x-x_{k+1}) = \alpha _{k+1} ( x - z_k). \end{aligned}$$
Thus, for all \(x \in Q\), we have
$$\begin{aligned}&\varPsi _{k+1}(x) \mathop {\ge }\limits ^{(48),(49),(1)} (A_{k+1}-B_{k+1}) \varphi (y_k) + B_{k+1} F_{k+1} \nonumber \\&+ \beta _{k} V(x,z_k) + \alpha _{k+1} \langle G_{k+1}, x-z_k \rangle + \alpha _{k+1} h(x) \nonumber \\&+ (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle + h(y_k) \right] - E_k \end{aligned}$$
(50)
At the same time,
$$\begin{aligned} \frac{\beta _{k}}{B_{k+1}} \mathop {\ge }\limits ^{(9)} \frac{\alpha _{k+1}^2\beta _{k+1}}{B_{k+1}^2} \mathop {=}\limits ^{(11)} \tau _k^2\beta _{k+1}. \end{aligned}$$
(51)
Using (50), we obtain
$$\begin{aligned} \begin{array}{lcl} \varPsi _{k+1}^* &{} \ge &{} B_{k+1} F_{k+1} + \min _{x\in Q} \left\{ \beta _{k} V(x,z_k) + \alpha _{k+1} \langle G_{k+1}, x-z_k \rangle +\right. \\ &{}&{} + \left. \alpha _{k+1} h(x) \right\} + (A_{k+1}-B_{k+1}) \varphi (y_k) + \\ &{}&{} (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle + h(y_k) \right] \\ &{}\mathop {=}\limits ^{(15)}&{} B_{k+1} F_{k+1} + \beta _{k} V(\hat{x}_{k+1},z_k) + \alpha _{k+1} \langle G_{k+1}, \hat{x}_{k+1}-z_k \rangle + \\ &{}&{} +\, \alpha _{k+1} h(\hat{x}_{k+1}) - E_k + (A_{k+1}-B_{k+1}) \varphi (y_k) + \\ &{}&{} (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle + h(y_k) \right] \\ &{}\mathop {\ge }\limits ^{(6)}&{} B_{k+1} \bigl [ F_{k+1} + \tau _k \langle G_{k+1}, \hat{x}_{k+1}-z_k \rangle + \frac{\beta _k}{B_{k+1}} \Vert \hat{x}_{k+1}-z_k\Vert ^2 \bigr ] - \\ &{}&{} -E_k + (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \right] \\ &{}&{} + B_{k+1} (\tau _k h(\hat{x}_{k+1}) + (1-\tau _k) h(y_k)) + (A_{k+1}-B_{k+1}) \varphi (y_k) \\ &{}\mathop {\ge }\limits ^{(51),(16)}&{} B_{k+1} \bigl [ F_{k+1} + \tau _k \langle G_{k+1}, \hat{x}_{k+1}-z_k \rangle + \frac{\tau _k^2\beta _{k+1}}{2} \Vert \hat{x}_{k+1}-z_k\Vert ^2 \bigr ] - E_k \\ &{}&{} + (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \right] \\ &{}&{} + B_{k+1}h(w_{k+1}) + (A_{k+1}-B_{k+1}) \varphi (y_k) \\ &{}\mathop {\ge }\limits ^{(14),(16)}&{} B_{k+1} \bigl [ F_{k+1} + \langle G_{k+1}, w_{k+1}-x_{k+1} \rangle + \frac{\beta _{k+1}}{2} \Vert w_{k+1}-x_{k+1}\Vert ^2 \\ &{}&{} +\, h(w_{k+1})\bigr ] - E_k + (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} \right. \\ &{}&{} + \left. \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \right] + (A_{k+1}-B_{k+1}) \varphi (y_k) \\ &{}=&{} B_{k+1} \bigl [ f_{k+1} + \langle g_{k+1}, w_{k+1}-x_{k+1} \rangle + \frac{L}{2} \Vert w_{k+1}-x_{k+1}\Vert ^2\\ &{}&{} +\, h(w_{k+1}) \bigr ] - E_k + B_{k+1} \bigl [ F_{k+1} - f_{k+1}\\ &{}&{} + \langle G_{k+1} - g_{k+1} , w_{k+1}-x_{k+1} \rangle + \frac{\beta _{k+1}-L}{2} \Vert w_{k+1}-x_{k+1}\Vert ^2 \bigr ] \\ &{}&{} + (B_{k+1}-\alpha _{k+1}) \left[ f_{k+1} - F_{k+1} + \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \right] \\ &{}&{} + (A_{k+1}-B_{k+1}) \varphi (y_k) \\ &{}\mathop {\ge }\limits ^{(2)}&{} B_{k+1} (f (w_{k+1}) + h(w_{k+1}) - \delta ) - E_k + \alpha _{k+1} ( F_{k+1} - f_{k+1} ) \\ &{}&{} + (B_{k+1}-\alpha _{k+1}) \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle + \\ &{}&{} + B_{k+1} \bigl [ \langle G_{k+1} - g_{k+1} , w_{k+1}-x_{k+1} \rangle + \frac{\beta _{k+1}-L}{2} \Vert w_{k+1}-x_{k+1}\Vert ^2 \bigr ]\\ &{}&{} + (A_{k+1}-B_{k+1}) \varphi (y_k) \\ &{} = &{} A_{k+1} \left( \frac{B_{k+1}}{A_{k+1}} \varphi (w_{k+1}) + \frac{A_{k+1}-B_{k+1}}{A_{k+1}} \varphi (y_k) \right) -B_{k+1}\delta - E_k \\ &{}&{} + \alpha _{k+1} ( F_{k+1} - f_{k+1} ) + (B_{k+1}-\alpha _{k+1}) \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle + \\ &{}&{} + B_{k+1} \bigl [ \langle G_{k+1} - g_{k+1} , w_{k+1}-x_{k+1} \rangle + \frac{\beta _{k+1}-L}{2} \Vert w_{k+1}-x_{k+1}\Vert ^2 \bigr ]\\ &{}\mathop {\ge }\limits ^{(17),(45)}&{} A_{k+1} \varphi (y_{k+1}) -E_k - B_{k+1} \delta + \alpha _{k+1} ( F_{k+1} - f_{k+1} ) \\ &{}&{}+ (B_{k+1}-\alpha _{k+1}) \langle g_{k+1} - G_{k+1}, y_k - x_{k+1} \rangle \\ &{}&{} - \frac{B_{k+1}}{\beta _{k+1}-L} \Vert g_{k+1} - G_{k+1}\Vert _*^2. \end{array} \end{aligned}$$
(52)
Finally, from (14), we have
$$\begin{aligned} y_k - x_{k+1} \mathop {=}\limits ^{(14)} \tau _k (y_k-z_k) \mathop {=}\limits ^{(11)} \frac{\alpha _{k+1}}{B_{k+1}}(y_k-z_k). \end{aligned}$$
Thus, by (52), we obtain (19) for \(k+1\). \(\square \)
Appendix B: Proof of Theorem 4.1
Obviously, (35) follows from (34) and (29). Let us prove the inequality
$$\begin{aligned} {\mathbb E}\varphi (u_{k}) - \varphi ^* \le \frac{\mu R_0^2}{2} \mathrm{e}^{-k} + \frac{C_3 \mathrm{e}2^{p-1} }{e-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}}\left( 1-\mathrm{e}^{-k}\right) \delta \end{aligned}$$
(53)
for all \(k \ge 1\). Then we will have (34) as a consequence. Let us prove (53) for \(k=1\). It follows from Lemma 4.1 that
$$\begin{aligned} {\mathbb E}\varphi (y_{N_0}) - \varphi ^* \le \frac{C_1 LR_0^2 V^2}{N_0^{p}} + \frac{C_2 \sigma R_0 V }{\sqrt{m_0N_0}} + C_3 N_0^{p-1}\delta . \end{aligned}$$
(54)
From (31), we have
$$\begin{aligned}&\frac{C_1 LR_0^2 V^2}{N_0^{p}} \le \frac{C_1 LR_0^2 V^2}{\frac{4\mathrm{e}C_1LV^2}{\mu }} \le \frac{\mu R_0^2}{4\mathrm{e}}, \nonumber \\&C_3 N_0^{p-1} \delta \le \frac{C_3 \mathrm{e}2^{p-1}}{\mathrm{e}-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \left( 1-\mathrm{e}^{-1}\right) \delta . \end{aligned}$$
(55)
Using (32), we obtain
$$\begin{aligned} \frac{C_2 \sigma R_0 V }{\sqrt{m_0N_0}} \le \frac{C_2 \sigma R_0 V }{\sqrt{\frac{16\mathrm{e}^{2} C_2^2 \sigma ^2 V^2}{\mu ^2 R_0^2 N_0 } N_0}} \le \frac{\mu R_0^2}{4\mathrm{e}}. \end{aligned}$$
This with (54) and (55) proves (53) for \(k=1\).
Let us now assume that (53) holds for \(k=j\) and prove that it holds for \(k=j+1\). It follows from (29) and(53) for \(k=j\) that
$$\begin{aligned}&{\mathbb E}\Vert u_j-x^*\Vert ^2\le \frac{2}{\mu } \left( {\mathbb E}\varphi (u_j) - \varphi ^* \right) \nonumber \\&\le \frac{2}{\mu } \left( \frac{\mu R_0^2}{2} \mathrm{e}^{-j} + \frac{C_3 \mathrm{e}2^{p-1} }{\mathrm{e}-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}}\left( 1-\mathrm{e}^{-j}\right) \delta \right) \mathop {=}\limits ^{(33)} R_j^2.\nonumber \end{aligned}$$
After \(N_{j}\) iterations of Algorithm 1 with starting point \(u_{j}\) (or \(y_{N_{j-1}}\), which is the same), applying Lemma 4.1, we have
$$\begin{aligned} {\mathbb E}\varphi (y_{N_{j}}) - \varphi ^* \le \frac{C_1 LR_j^2 V^2}{N_{j}^{p}} + \frac{C_2 \sigma R_j V }{\sqrt{m_{j}N_{j}}} + C_3 N_{j}^{p-1}\delta . \end{aligned}$$
(56)
From (31), we have
$$\begin{aligned} \frac{C_1 LR_j^2 V^2}{N_{j}^{p}} \le \frac{C_1 LR_{j}^2 V^2}{\frac{4\mathrm{e}C_1LV^2}{\mu }} \le \frac{\mu R_j^2}{4\mathrm{e}}, \quad C_3 N_{j}^{p-1} \delta \le C_3 2^{p-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta . \end{aligned}$$
(57)
By (32),
$$\begin{aligned}&m_{j} \ge \frac{16\mathrm{e}^{j+2} C_2^2 \sigma ^2 V^2}{\mu ^2 R_0^2 N_{j} } \ge \frac{16\mathrm{e}^{2} C_2^2 \sigma ^2 V^2}{\mu ^2 N_{j} \left( R_0^2 \mathrm{e}^{-j} + \frac{2^p \mathrm{e}C_3 \delta }{\mu (\mathrm{e}-1)} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \left( 1-\mathrm{e}^{-j} \right) \delta \right) } \nonumber \\&\mathop {=}\limits ^{(33)} \frac{16\mathrm{e}^{2} C_2^2 \sigma ^2 V^2}{\mu ^2 R_{j}^2 N_{j} }, \nonumber \end{aligned}$$
and, hence,
$$\begin{aligned} \frac{C_2 \sigma R_j V }{\sqrt{m_jN_j}} \le \frac{C_2 \sigma R_j V }{\sqrt{\frac{16\mathrm{e}^{2} C_2^2 \sigma ^2 V^2}{\mu ^2 R_j^2 N_j } N_j}} \le \frac{\mu R_j^2}{4\mathrm{e}}. \end{aligned}$$
(58)
Finally, we arrive at
$$\begin{aligned}&{\mathbb E}\varphi (u_{j+1}) - \varphi ^* = {\mathbb E}\varphi (y_{N_{j}}) - \varphi ^* \mathop {\le }\limits ^{(56),(57),(58)} \frac{\mu R_j^2}{2\mathrm{e}} + C_3 2^{p-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta \mathop {=}\limits ^{(33)} \nonumber \\&\quad = \frac{1}{\mathrm{e}} \left( \frac{\mu R_0^2}{2} \mathrm{e}^{-j} + \frac{C_3 \mathrm{e}2^{p-1} }{\mathrm{e}-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}}\left( 1-\mathrm{e}^{-j}\right) \delta \right) \nonumber \\&\qquad + C_3 2^{p-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta \nonumber \\&\quad = \frac{\mu R_0^2}{2} \mathrm{e}^{-(j+1)} + \frac{C_3 \mathrm{e}2^{p-1} }{\mathrm{e}-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}}\left( 1-\mathrm{e}^{-(j+1)}\right) \delta . \nonumber \end{aligned}$$
Thus, we have obtained that (53) holds for \(k=j+1\) and, by induction, it holds for all \(k \ge 1\). If we choose \(\delta \) satisfying (36) and perform \(N=\left\lceil \ln \left( \frac{\mu R_0^2}{ \varepsilon }\right) \right\rceil \) outer iterations of Algorithm 2, we will obtain from (34)
$$\begin{aligned} {\mathbb E}\varphi (u_{N}) - \varphi ^* \le \frac{\mu R_0^2}{2} \mathrm{e}^{-N} + \frac{C_3 \mathrm{e}2^{p-1}}{e-1} \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta \le \frac{\varepsilon }{2}+\frac{\varepsilon }{2} = \varepsilon . \end{aligned}$$
It remains to calculate the number of oracle calls to obtain an \(\varepsilon \)-solution in the sense that \({\mathbb E}\varphi (u_{N}) - \varphi ^* \le \varepsilon \). We perform N outer iterations (k runs from 0 to \(N-1\)), on each outer iteration k, we perform \(N_k\) inner iterations, and, on each inner iteration, we call the oracle \(m_k\) times. Hence, the total number of oracle calls is
$$\begin{aligned}&{\mathcal C} (\varepsilon ) = \sum _{k=0}^{N-1}{N_k m_k} \le \sum _{k=0}^{N-1}{N_k \left( 1 + \frac{16 C_2^2 \mathrm{e}^2 \sigma ^2 V^2 e^k}{\mu ^2 R_0^2 N_k } \right) } \le N N_0+{\frac{16 C_2^2 \mathrm{e}^2 \sigma ^2 V^2 \mathrm{e}^{N}}{\mu ^2 R_0^2 (\mathrm{e}-1)}} \nonumber \\&\quad \le \left( 1+ \ln \left( \frac{\mu R_0^2}{ \varepsilon }\right) \right) \left( 1+ \left( \frac{4\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{1}{p}} \right) + \frac{16\mathrm{e}^3C_2^2\sigma ^2V^2}{\mu \varepsilon (\mathrm{e}-1)} \nonumber \\&\quad \le \left( 1+ \ln \left( \frac{\mu R_0^2}{ \varepsilon }\right) \right) \left( 1+ \left( \frac{62LV^2}{\mu }\right) ^{\frac{1}{p}} \right) + \frac{96000\sigma ^2V^2}{\mu \varepsilon } . \nonumber \end{aligned}$$
\(\square \)
Appendix C: Proof of Theorem 4.2
Let \(A_k,k\ge 0\) be the event \(A_k := \left\{ \varphi (u_k) - \varphi ^* \le \frac{\mu R_k^2}{ 2 } \right\} \) and \(\bar{A}_k\) be its complement. Let us first prove that, for \(k \ge 1\),
$$\begin{aligned} {\mathbb P}\left\{ \left. \varphi (u_{k}) - \varphi ^* > \frac{\mu R_k^2}{ 2 } \right| A_{k-1} \right\} \le \frac{\Lambda }{N}. \end{aligned}$$
(59)
Since the event \(A_{k-1}\) holds, we have from (29)
$$\begin{aligned} \left\| u_{k-1} - x^*\right\| ^2 \le \frac{2}{\mu } \left( \varphi (u_{k-1}) - \varphi ^* \right) \le R_{k-1}^2. \end{aligned}$$
Hence, the solution of the problem \(\min _{x \in Q_{k-1}} \varphi (x)\) is the same as the solution of the initial Problem (1). Let us denote \(D_{k-1} := \max _{x,y\in Q_{k-1}} \Vert x-y\Vert \). Clearly, \(D_{k-1} \le 2R_{k-1}\). Note that \( D_{k-1} = R_{k-1} \max _{x,y\in Q_{k-1}}\frac{\Vert x~-~y\Vert }{R_{k-1}}\) and the diameter of the set \(Q_{k-1}\) with respect to the norm \(\frac{\Vert \cdot \Vert }{R_{k-1}}\) is not greater than 2. We apply Theorems 3.2 and 3.4 with \(LR_{k-1}^2\) in the role of \(L,\frac{\sigma R_{k-1} }{\sqrt{m_{k-1}}}\) in the role of \(\sigma ,V\) in the role of R, 2 in the role of D, use (37) and make the same argument as in the proof of Lemma 4.1. This leads to the following inequality
$$\begin{aligned}&{\mathbb P}\left\{ \varphi (u_k) - \varphi ^* > \frac{C_1 LR_{k-1}^2V^2 }{N_{k-1}^{p}} + \frac{C_2( 1+ \Omega )\sigma R_{k-1} V }{\sqrt{m_{k-1}N_{k-1}}} + C_3 N_{k-1}^{p-1} \delta + \right. \nonumber \\&\left. \left. \frac{2C_4 R_{k-1} \sigma \sqrt{\Omega } }{\sqrt{m_{k-1}N_{k-1}}} \right| A_{k-1} \right\} \le \frac{\Lambda }{N} , \end{aligned}$$
(60)
where \(C_1=4 \sqrt{2},C_2 = 16 \sqrt{2},C_3 = 48,C_4 = 4 \sqrt{3},\Omega = \ln \left( \frac{3N}{\Lambda } \right) \). Using (38), we have
$$\begin{aligned} \frac{C_1 LR_{k-1}^2 V^2}{N_{k-1}^{p}} \le \frac{C_1 LR_{k-1}^2 V^2}{\frac{6\mathrm{e}C_1LV^2}{\mu }} \le \frac{\mu R_{k-1}^2}{6\mathrm{e}}, \; C_3 N_{k-1}^{p-1} \delta \le C_3 2^{p-1} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta . \end{aligned}$$
(61)
From (39), we have
$$\begin{aligned}&m_{k-1} \ge \frac{36\mathrm{e}^{k+1} C_2^2 \sigma ^2 V^2 (1+\Omega )^2}{\mu ^2 R_0^2 N_{k-1} } \ge \nonumber \\&\frac{36\mathrm{e}^{2} C_2^2 \sigma ^2 V^2 (1+\Omega )^2 }{\mu ^2 N_{k-1} \left( R_0^2 \mathrm{e}^{-(k-1)} + \frac{2^p \mathrm{e}C_3 \delta }{\mu (\mathrm{e}-1)} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \left( 1-\mathrm{e}^{-(k-1)} \right) \delta \right) } \mathop {=}\limits ^{(40)} \nonumber \\&= \frac{36\mathrm{e}^{2} C_2^2 \sigma ^2 V^2 (1+\Omega )^2}{\mu ^2 R_{k-1}^2 N_{k-1} }, \nonumber \end{aligned}$$
and, hence,
$$\begin{aligned} \frac{C_2 \sigma R_{k-1} V (1+\Omega )}{\sqrt{m_{k-1}N_{k-1}}} \le \frac{C_2 \sigma R_{k-1} V (1+\Omega )}{\sqrt{\frac{36\mathrm{e}^{2} C_2^2 \sigma ^2 V^2 (1+\Omega )^2}{\mu ^2 R_{k-1}^2 N_{k-1} } N_{k-1}}} \le \frac{\mu R_{k-1}^2}{6\mathrm{e}}. \end{aligned}$$
(62)
Also, by (39), we have
$$\begin{aligned}&m_{k-1} \ge \frac{144\mathrm{e}^{k+1} C_4^2 \sigma ^2 \Omega }{\mu ^2 R_0^2 N_{k-1} } \ge \nonumber \\&\frac{144\mathrm{e}^{2} C_4^2 \sigma ^2 \Omega }{\mu ^2 N_{k-1} \left( R_0^2 \mathrm{e}^{-(k-1)} + \frac{2^p \mathrm{e}C_3 \delta }{\mu (\mathrm{e}-1)} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \left( 1-\mathrm{e}^{-(k-1)} \right) \delta \right) } \mathop {=}\limits ^{(40)} \frac{144\mathrm{e}^{2} C_4^2 \sigma ^2 \Omega }{\mu ^2 R_{k-1}^2 N_{k-1} }, \nonumber \end{aligned}$$
and, hence,
$$\begin{aligned} \frac{2C_4 R_{k-1} \sigma \sqrt{\Omega } }{\sqrt{m_{k-1}N_{k-1}}} \le \frac{2C_4 R_{k-1} \sigma \sqrt{\Omega } }{\sqrt{\frac{144\mathrm{e}^{2} C_4^2 \sigma ^2 \Omega }{\mu ^2 R_{k-1}^2 N_{k-1} } N_{k-1}}} \le \frac{\mu R_{k-1}^2}{6\mathrm{e}}. \end{aligned}$$
(63)
Finally, we arrive at
$$\begin{aligned}&\frac{C_1 LR_{k-1}^2V^2 }{N_{k-1}^{p}} + \frac{C_2( 1+ \Omega )\sigma R_{k-1} V }{\sqrt{m_{k-1}N_{k-1}}} + C_3 N_{k-1}^{p-1} \delta + \frac{2C_4 R_{k-1} \sigma \sqrt{\Omega } }{\sqrt{m_{k-1}N_{k-1}}} \mathop {\le }\limits ^{(61),(62),(63)} \nonumber \\&\quad \le \frac{\mu R_{k-1}^2}{2\mathrm{e}} + C_3 2^{p-1} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta \nonumber \\&\quad \mathop {=}\limits ^{(40)} \frac{1}{\mathrm{e}} \left( \frac{\mu R_0^2}{2} \mathrm{e}^{-(k-1)} + \frac{C_3 \mathrm{e}2^{p-1} }{\mathrm{e}-1} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}}\left( 1-\mathrm{e}^{-(k-1)}\right) \delta \right) \nonumber \\&\qquad + C_3 2^{p-1} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta \nonumber \\&\quad = \frac{\mu R_0^2}{2} \mathrm{e}^{-k} + \frac{C_3 \mathrm{e}2^{p-1} }{\mathrm{e}-1} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}}\left( 1-\mathrm{e}^{-k}\right) \delta = \frac{\mu R_k^2}{2}. \nonumber \end{aligned}$$
Thus, (59) follows from (60).
Also, for all \(k = 1, \dots , N\), we have
$$\begin{aligned}&{\mathbb P}\left\{ \varphi (u_{k}) - \varphi ^*> \frac{\mu R_k^2}{ 2 } \right\} = {\mathbb P}\left\{ \left. \varphi (u_{k}) - \varphi ^*> \frac{\mu R_k^2}{ 2 }\right| A_{k-1} \cup \bar{A}_{k-1} \right\} \nonumber \\&= {\mathbb P}\left\{ \left. \varphi (u_{k}) - \varphi ^*> \frac{\mu R_k^2}{ 2 } \right| A_{k-1} \right\} {\mathbb P}\{ A_{k-1} \} \nonumber \\&+ {\mathbb P}\left\{ \left. \varphi (u_{k}) - \varphi ^*> \frac{\mu R_k^2}{ 2 } \right| \bar{A}_{k-1} \right\} {\mathbb P}\{\bar{A}_{k-1} \} \mathop {\le }\limits ^{(59)} \nonumber \\&\le \frac{\Lambda }{N} + {\mathbb P}\{\bar{A}_{k-1} \} = \frac{\Lambda }{N} + {\mathbb P}\left\{ \varphi (u_{k-1}) - \varphi ^* > \frac{\mu R_{k-1}^2}{ 2 }\right\} .\nonumber \end{aligned}$$
Using that \({\mathbb P}\{A_0\}=1\) and summing up these inequalities, we obtain
$$\begin{aligned}&{\mathbb P}\left\{ \varphi (u_{N}) - \varphi ^*> \frac{\mu R_0^2}{ 2 } \mathrm{e}^{-N} + \frac{2^{p-1} \mathrm{e}C_3 \delta }{(\mathrm{e}-1)} \left( \frac{6\mathrm{e}C_1LV^2}{\mu }\right) ^{\frac{p-1}{p}} \delta \right\} \le \nonumber \\&{\mathbb P}\left\{ \varphi (u_{N}) - \varphi ^* > \frac{\mu R_N^2}{ 2 } \right\} \le \Lambda . \nonumber \end{aligned}$$
Making the same arguments as in the proof of Theorem 4.1, we obtain the complexity bound (44). \(\square \)