-
Notifications
You must be signed in to change notification settings - Fork 4
/
yoast-sitemaps-cli.php
361 lines (276 loc) · 7.69 KB
/
yoast-sitemaps-cli.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
<?php // phpcs:ignore
/**
* Plugin Name: Yoast Sitemap CLI
* Description: Define Yoast sitemap generation CLI commands
* Author: The Code Co
* Version: 1.0
*
* @package yoast-sitemap-cli
*/
namespace TheCodeCompany;
/**
* Yoast sitemap CLI plugin driver class.
*
* This adds the following WP CLI command to generate and cache the root sitemap.
*
* `wp yoast-sitemap-build-root`
*
* This will generate the root sitemap and store it as a vanilla XML file in the uploads directory. This will be
* automatically served when you hit the root sitemap index defined by Yoast. I.e. https://yoursite.com/sitemap_index.xml.
*
* ## Set Up
*
* This command should be configured to run via a cron every 10 mins or so, like so:
*
* ```
* *\/10 * * * * cd /srv/www/mysite/current && wp yoast-sitemap-build-root --url="https://yoursite.com" 2>&1 2>/dev/null
* ```
*/
class YoastSitemapCLI {
const CMD_BUILD_ROOT_SITEMAP = 'yoast-sitemap-build-root';
/** The max number of posts per individual sitemap generated by Yoast. */
const SITEMAP_POSTS_PER_PAGE = 1000;
/** @var WPSEO_Sitemaps */
protected $yoast_sitemap = null;
/**
* Boot the plugin by registering CLI command, routes etc.
*
* @return void
*/
public function boot() {
$this->register_cli_commands();
$this->register_routes();
$this->apply_sitemap_optimisations();
}
/**
* Register the plugins WP CLI command/s.
*
* @return void
*/
public function register_cli_commands() {
if ( class_exists( 'WP_CLI' ) ) {
\WP_CLI::add_command(
self::CMD_BUILD_ROOT_SITEMAP,
array( $this, 'cmd_build_root_sitemap' ),
array(
'shortdesc' => 'Generate Yoast root sitemap.',
'longdesc' => 'Generate Yoast root sitemap and save to cache file on disk.',
)
);
}
}
/**
* Handle custom sitemap routeing.
*
* @return void
*/
public function register_routes() {
\add_filter( 'do_parse_request', array( $this, 'handle_routing' ) );
}
/**
* Handle custom routing.
* We may not have access to the TCC custom routing system so we have to do this manually via
* this hook into `do_parse_request`.
*
* @return boolean
*/
public function handle_routing( $continue ) {
// Get the request path / URI.
$request_path = $_SERVER['REQUEST_URI'] ?? '';
$request_path = trim( $request_path, '/' ); // Remove leading/trailing slashes.
$request_path = strtok( $request_path, '?' ); // Remove query string args.
// Root sitemap.
if ( preg_match( '{^sitemap_index.xml$}', $request_path ) ) {
$this->handle_route_sitemap_root();
}
// Sitemap XSL which doesn't work otherwise for some reason.
if ( preg_match( '{^main-sitemap.xsl$}', $request_path ) ) {
$this->handle_route_sitemap_xsl();
}
return $continue;
}
/**
* Apply some optimisations to the Yoast sitemap generation process.
*
* @return void
*/
public function apply_sitemap_optimisations() {
\add_filter(
'wpseo_sitemap_entries_per_page',
function( $per_page ) {
return self::SITEMAP_POSTS_PER_PAGE;
}
);
}
/**
* WP CLI callback to run the root sitemap generation command.
*
* @return void
*/
public function cmd_build_root_sitemap() {
$this->log_info(
sprintf(
'Building root sitemap to %s',
$this->get_sitemap_root_cache_file_path()
)
);
$this->build_sitemap_root_cache_file();
$this->log_info(
sprintf(
'Saved root sitemap to %s',
$this->get_sitemap_root_cache_file_path()
)
);
$this->ping_search_engines();
$this->log_info(
sprintf(
'Pinged search engines with updated sitemap',
$this->get_sitemap_root_cache_file_path()
)
);
$this->log_success( 'Done.' );
}
/**
* Handle the root sitemap custom route.
*
* @return void
*/
protected function handle_route_sitemap_root() {
$sitemap_root_cache_file_path = $this->get_sitemap_root_cache_file_path();
if ( file_exists( $sitemap_root_cache_file_path ) ) {
// Set mime type & caching headers for sitemaps file.
// NOTE This is a modified version of what Yoast does.
if ( ! headers_sent() ) {
header( 'HTTP/1.1 200 OK', true, 200 );
// Prevent the search engines from indexing the XML Sitemap.
header( 'X-Robots-Tag: noindex, follow', true );
header( 'X-Sitemap-Served: php', true );
header( 'Content-Type: text/xml' );
}
// Output the cached sitemap file.
readfile( $this->get_sitemap_root_cache_file_path() );
} else {
// Cache file not present, 404 Not Found.
header( 'HTTP/1.1 404 Not Found', true, 404 );
echo '404 Not Found';
}
die;
}
/**
* Handle the sitemap custom XML file route.
* This replaces Yoasts built in route which breaks for some reason when we rewrite the root sitemap.
*
* @return void
*/
protected function handle_route_sitemap_xsl() {
$yoast_sitemap = $this->get_yoast_sitemap_instance();
$yoast_sitemap->xsl_output( 'main' );
die;
}
/**
* Build the root sitemap and save the XML to the cache file on disk.
*
* @return void
*/
protected function build_sitemap_root_cache_file() {
$sitemap_content = $this->build_sitemap_root();
\file_put_contents( // phpcs:ignore
$this->get_sitemap_root_cache_file_path(),
$sitemap_content
);
}
/**
* Build the root sitemap and return the raw XML output.
*
* @return string
*/
protected function build_sitemap_root() {
$sitemap_content = '';
$yoast_sitemap = $this->get_yoast_sitemap_instance();
// Generate the root sitemap in Yoast and capture the output XML.
ob_start();
$yoast_sitemap->init_sitemaps_providers();
$yoast_sitemap->build_root_map();
$yoast_sitemap->output();
$sitemap_content = ob_get_clean();
$sitemap_content = \apply_filters( 'yoast_sitemap_cli_root_content', $sitemap_content );
return $sitemap_content;
}
/**
* Ping search engines to let them know the sitemap has been updated.
* This is done via Yoast, we just call their function.
*
* @return void
*/
protected function ping_search_engines() {
\WPSEO_Sitemaps::ping_search_engines();
}
/**
* Get the path of the root sitemap cache file.
*
* @return string
*/
protected function get_sitemap_root_cache_file_path() {
$upload_dir = wp_upload_dir();
// Set directory the sitemap will be stored in.
$cache_dir = '.';
if ( isset( $upload_dir['basedir'] ) ) {
$cache_dir = $upload_dir['basedir'];
}
// Generate sitemap file path.
$cache_file = sprintf(
'%s/sitemap_index.xml',
$cache_dir
);
$cache_file = \apply_filters( 'yoast_sitemap_cli_root_cache_file_path', $cache_file );
return $cache_file;
}
/**
* Returns the `WPSEO_Sitemaps` instance which should be used by this plugin.
* This is kinda like a singleton instance.
*
* @return \WPSEO_Sitemaps
*/
protected function get_yoast_sitemap_instance() {
if ( null === $this->yoast_sitemap ) {
$this->yoast_sitemap = new \WPSEO_Sitemaps();
}
return $this->yoast_sitemap;
}
/**
* Print an info level message to the console.
*
* @param string $message Message to print.
* @return void
*/
protected function log_info( string $message ) {
if ( class_exists( 'WP_CLI' ) ) {
\WP_CLI::line( $message );
}
}
/**
* Print a success level message to the console.
*
* @param string $message Message to print.
* @return void
*/
protected function log_success( string $message ) {
if ( class_exists( 'WP_CLI' ) ) {
\WP_CLI::success( $message );
}
}
/**
* Print an error level message to the console.
*
* @param string $message Message to print.
* @return void
*/
protected function log_error( string $message ) {
if ( class_exists( 'WP_CLI' ) ) {
\WP_CLI::error( $message );
}
}
}
// Boot plugin driver class.
$yoast_sitemap_cli = new YoastSitemapCLI();
$yoast_sitemap_cli->boot();