Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.37% |
131 / 163 |
|
64.29% |
9 / 14 |
CRAP | |
0.00% |
0 / 1 |
PostContentUtils | |
80.37% |
131 / 163 |
|
64.29% |
9 / 14 |
67.17 | |
0.00% |
0 / 1 |
getContentBody | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
getPostBody | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
3.10 | |||
getPostSummaryWrapperFormat | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getPostSummary | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
getSegments | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
20 | |||
getContentWithoutExcludedBlocks | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
3 | |||
getAudioEnabledBlocks | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
5.09 | |||
getContentParams | |
100.00% |
31 / 31 |
|
100.00% |
1 / 1 |
7 | |||
getMetadata | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getAllTaxonomiesAndTerms | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
getAuthorName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
addMarkerAttribute | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
addMarkerAttributeWithHTMLTagProcessor | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
addMarkerAttributeWithDOMDocument | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
4 |
1 | <?php |
2 | |
3 | declare(strict_types=1); |
4 | |
5 | namespace Beyondwords\Wordpress\Component\Post; |
6 | |
7 | /** |
8 | * BeyondWords Post Content Utilities. |
9 | * |
10 | * @package Beyondwords |
11 | * @subpackage Beyondwords/includes |
12 | * @author Stuart McAlpine <stu@beyondwords.io> |
13 | * @since 3.5.0 |
14 | */ |
15 | class PostContentUtils |
16 | { |
17 | public const DATE_FORMAT = 'Y-m-d\TH:i:s\Z'; |
18 | |
19 | /** |
20 | * Get the content "body" param for the audio, ready to be sent to the |
21 | * BeyondWords API. |
22 | * |
23 | * From API version 1.1 the "summary" param is going to be used differently, |
24 | * so for WordPress we now prepend the WordPress excerpt to the "body" param. |
25 | * |
26 | * @param int|WP_Post $post The WordPress post ID, or post object. |
27 | * |
28 | * @since 4.6.0 |
29 | * |
30 | * @return string The content body param. |
31 | */ |
32 | public static function getContentBody($post) |
33 | { |
34 | $post = get_post($post); |
35 | |
36 | if (!($post instanceof \WP_Post)) { |
37 | throw new \Exception(esc_html__('Post Not Found', 'speechkit')); |
38 | } |
39 | |
40 | $summary = PostContentUtils::getPostSummary($post); |
41 | $body = PostContentUtils::getPostBody($post); |
42 | |
43 | if ($summary) { |
44 | $format = PostContentUtils::getPostSummaryWrapperFormat($post); |
45 | |
46 | $body = sprintf($format, $summary) . $body; |
47 | } |
48 | |
49 | return $body; |
50 | } |
51 | |
52 | /** |
53 | * Get the post body for the audio content. |
54 | * |
55 | * @since 3.0.0 |
56 | * @since 3.5.0 Moved from Core\Utils to Component\Post\PostUtils |
57 | * @since 3.8.0 Exclude Gutenberg blocks with attribute { beyondwordsAudio: false } |
58 | * @since 4.0.0 Renamed from PostContentUtils::getSourceTextForAudio() to PostContentUtils::getBody() |
59 | * @since 4.6.0 Renamed from PostContentUtils::getBody() to PostContentUtils::getPostBody() |
60 | * @since 4.7.0 Remove wpautop filter for block editor API requests. |
61 | * @since 5.0.0 Remove SpeechKit-Start shortcode. |
62 | * @since 5.0.0 Remove beyondwords_content filter. |
63 | * |
64 | * @param int|WP_Post $post The WordPress post ID, or post object. |
65 | * |
66 | * @return string The body (the processed $post->post_content). |
67 | */ |
68 | public static function getPostBody($post) |
69 | { |
70 | $post = get_post($post); |
71 | |
72 | if (!($post instanceof \WP_Post)) { |
73 | throw new \Exception(esc_html__('Post Not Found', 'speechkit')); |
74 | } |
75 | |
76 | $content = PostContentUtils::getContentWithoutExcludedBlocks($post); |
77 | |
78 | if (has_blocks($post)) { |
79 | // wpautop breaks our HTML markup when block editor paragraphs are empty |
80 | remove_filter('the_content', 'wpautop'); |
81 | |
82 | // But we still want to remove empty lines |
83 | $content = preg_replace('/^\h*\v+/m', '', $content); |
84 | } |
85 | |
86 | // Apply the_content filters to handle shortcodes etc |
87 | $content = apply_filters('the_content', $content); |
88 | |
89 | // Trim to remove trailing newlines – common for WordPress content |
90 | return trim($content); |
91 | } |
92 | |
93 | /** |
94 | * Get the post summary wrapper format. |
95 | * |
96 | * This is a <div> with optional attributes depending on the BeyondWords |
97 | * data of the post. |
98 | * |
99 | * @param int|WP_Post $post The WordPress post ID, or post object. |
100 | * |
101 | * @since 4.6.0 |
102 | * |
103 | * @return string The summary wrapper <div>. |
104 | */ |
105 | public static function getPostSummaryWrapperFormat($post) |
106 | { |
107 | $post = get_post($post); |
108 | |
109 | if (!($post instanceof \WP_Post)) { |
110 | throw new \Exception(esc_html__('Post Not Found', 'speechkit')); |
111 | } |
112 | |
113 | $summaryVoiceId = intval(get_post_meta($post->ID, 'beyondwords_summary_voice_id', true)); |
114 | |
115 | if ($summaryVoiceId > 0) { |
116 | return '<div data-beyondwords-summary="true" data-beyondwords-voice-id="' . $summaryVoiceId . '">%s</div>'; |
117 | } |
118 | |
119 | return '<div data-beyondwords-summary="true">%s</div>'; |
120 | } |
121 | |
122 | /** |
123 | * Get the post summary for the audio content. |
124 | * |
125 | * @param int|WP_Post $post The WordPress post ID, or post object. |
126 | * |
127 | * @since 4.0.0 |
128 | * @since 4.6.0 Renamed from PostContentUtils::getSummary() to PostContentUtils::getPostSummary() |
129 | * |
130 | * @return string The summary. |
131 | */ |
132 | public static function getPostSummary($post) |
133 | { |
134 | $post = get_post($post); |
135 | |
136 | if (!($post instanceof \WP_Post)) { |
137 | throw new \Exception(esc_html__('Post Not Found', 'speechkit')); |
138 | } |
139 | |
140 | $summary = null; |
141 | |
142 | // Optionally send the excerpt to the REST API, if the plugin setting has been checked |
143 | $prependExcerpt = get_option('beyondwords_prepend_excerpt'); |
144 | |
145 | if ($prependExcerpt && has_excerpt($post)) { |
146 | // Escape characters |
147 | $summary = htmlentities($post->post_excerpt, ENT_QUOTES | ENT_XHTML); |
148 | // Apply WordPress filters |
149 | $summary = apply_filters('get_the_excerpt', $summary); |
150 | // Convert line breaks into paragraphs |
151 | $summary = trim(wpautop($summary)); |
152 | } |
153 | |
154 | return $summary; |
155 | } |
156 | |
157 | /** |
158 | * Get the segments for the audio content, ready to be sent to the BeyondWords API. |
159 | * |
160 | * @codeCoverageIgnore |
161 | * THIS METHOD IS CURRENTLY NOT IN USE. Segments cannot currently include HTML |
162 | * formatting tags such as <strong> and <em> so we do not pass segments, we pass |
163 | * a HTML string as the body param instead. |
164 | * |
165 | * @param int|WP_Post $post The WordPress post ID, or post object. |
166 | * |
167 | * @since 4.0.0 |
168 | * |
169 | * @return array|null The segments. |
170 | */ |
171 | public static function getSegments($post) |
172 | { |
173 | if (! has_blocks($post)) { |
174 | return null; |
175 | } |
176 | |
177 | $titleSegment = (object) [ |
178 | 'section' => 'title', |
179 | 'text' => get_the_title($post), |
180 | ]; |
181 | |
182 | $summarySegment = (object) [ |
183 | 'section' => 'summary', |
184 | 'text' => PostContentUtils::getPostSummary($post), |
185 | ]; |
186 | |
187 | $blocks = PostContentUtils::getAudioEnabledBlocks($post); |
188 | |
189 | $bodySegments = array_map(function ($block) { |
190 | $marker = null; |
191 | |
192 | if (isset($block['attrs']) && isset($block['attrs']['beyondwordsMarker'])) { |
193 | $marker = $block['attrs']['beyondwordsMarker']; |
194 | } |
195 | |
196 | return (object) [ |
197 | 'section' => 'body', |
198 | 'marker' => $marker, |
199 | 'text' => trim(render_block($block)), |
200 | ]; |
201 | }, $blocks); |
202 | |
203 | // Merge title, summary and body segments |
204 | $segments = array_values(array_merge([$titleSegment], [$summarySegment], $bodySegments)); |
205 | |
206 | // Remove any segments with empty text |
207 | $segments = array_values(array_filter($segments, function ($segment) { |
208 | return (! empty($segment->text)); |
209 | })); |
210 | |
211 | return $segments; |
212 | } |
213 | |
214 | /** |
215 | * Get the post content without blocks which have been filtered. |
216 | * |
217 | * We have added buttons into the Gutenberg editor to optionally exclude selected |
218 | * blocks from the source text for audio. |
219 | * |
220 | * This method filters all blocks, removing any which have been excluded. |
221 | * |
222 | * @param int|WP_Post $post The WordPress post ID, or post object. |
223 | * |
224 | * @since 3.8.0 |
225 | * @since 4.0.0 Replace for loop with array_reduce |
226 | * |
227 | * @return string The post body without excluded blocks. |
228 | */ |
229 | public static function getContentWithoutExcludedBlocks($post) |
230 | { |
231 | if (! has_blocks($post)) { |
232 | return trim($post->post_content); |
233 | } |
234 | |
235 | $blocks = parse_blocks($post->post_content); |
236 | $output = ''; |
237 | |
238 | $blocks = PostContentUtils::getAudioEnabledBlocks($post); |
239 | |
240 | foreach ($blocks as $block) { |
241 | $marker = $block['attrs']['beyondwordsMarker'] ?? ''; |
242 | |
243 | $output .= PostContentUtils::addMarkerAttribute( |
244 | render_block($block), |
245 | $marker |
246 | ); |
247 | } |
248 | |
249 | return $output; |
250 | } |
251 | |
252 | /** |
253 | * Get audio-enabled blocks. |
254 | * |
255 | * @param int|WP_Post $post The WordPress post ID, or post object. |
256 | * |
257 | * @since 4.0.0 |
258 | * @since 5.0.0 Remove beyondwords_post_audio_enabled_blocks filter. |
259 | * |
260 | * @return array The blocks. |
261 | */ |
262 | public static function getAudioEnabledBlocks($post) |
263 | { |
264 | $post = get_post($post); |
265 | |
266 | if (! ($post instanceof \WP_Post)) { |
267 | return []; |
268 | } |
269 | |
270 | if (! has_blocks($post)) { |
271 | return []; |
272 | } |
273 | |
274 | $allBlocks = parse_blocks($post->post_content); |
275 | |
276 | $blocks = array_filter($allBlocks, function ($block) { |
277 | $enabled = true; |
278 | |
279 | if (is_array($block['attrs']) && isset($block['attrs']['beyondwordsAudio'])) { |
280 | $enabled = (bool) $block['attrs']['beyondwordsAudio']; |
281 | } |
282 | |
283 | return $enabled; |
284 | }); |
285 | |
286 | return $blocks; |
287 | } |
288 | |
289 | /** |
290 | * Get the body param we pass to the API. |
291 | * |
292 | * @since 3.0.0 Introduced as getBodyJson. |
293 | * @since 3.3.0 Added metadata to aid custom playlist generation. |
294 | * @since 3.5.0 Moved from Core\Utils to Component\Post\PostUtils. |
295 | * @since 3.10.4 Rename `published_at` API param to `publish_date`. |
296 | * @since 4.0.0 Use new API params. |
297 | * @since 4.0.3 Ensure `image_url` is always a string. |
298 | * @since 4.3.0 Rename from getBodyJson to getContentParams. |
299 | * @since 4.6.0 Remove summary param & prepend body with summary. |
300 | * @since 5.0.0 Remove beyondwords_body_params filter. |
301 | * |
302 | * @static |
303 | * @param int $postId WordPress Post ID. |
304 | * |
305 | * @return string JSON endoded params. |
306 | **/ |
307 | public static function getContentParams($postId) |
308 | { |
309 | $body = [ |
310 | 'type' => 'auto_segment', |
311 | 'title' => get_the_title($postId), |
312 | 'body' => PostContentUtils::getContentBody($postId), |
313 | 'source_url' => get_the_permalink($postId), |
314 | 'source_id' => strval($postId), |
315 | 'author' => PostContentUtils::getAuthorName($postId), |
316 | 'image_url' => strval(wp_get_original_image_url(get_post_thumbnail_id($postId))), |
317 | 'metadata' => PostContentUtils::getMetadata($postId), |
318 | 'publish_date' => get_post_time(PostContentUtils::DATE_FORMAT, true, $postId), |
319 | ]; |
320 | |
321 | $status = get_post_status($postId); |
322 | |
323 | /* |
324 | * If the post status is draft/pending then we explicity send |
325 | * { published: false } to the BeyondWords API, to prevent the |
326 | * generated audio from being published in playlists. |
327 | * |
328 | * We also omit { publish_date } because get_post_time() returns `false` |
329 | * for posts which are "Pending Review". |
330 | */ |
331 | if (in_array($status, ['draft', 'pending'])) { |
332 | $body['published'] = false; |
333 | unset($body['publish_date']); |
334 | } elseif (get_option('beyondwords_project_auto_publish_enabled')) { |
335 | $body['published'] = true; |
336 | } |
337 | |
338 | $languageCode = get_post_meta($postId, 'beyondwords_language_code', true); |
339 | |
340 | if ($languageCode) { |
341 | $body['language'] = $languageCode; |
342 | } |
343 | |
344 | $bodyVoiceId = intval(get_post_meta($postId, 'beyondwords_body_voice_id', true)); |
345 | |
346 | if ($bodyVoiceId > 0) { |
347 | $body['body_voice_id'] = $bodyVoiceId; |
348 | } |
349 | |
350 | $titleVoiceId = intval(get_post_meta($postId, 'beyondwords_title_voice_id', true)); |
351 | |
352 | if ($titleVoiceId > 0) { |
353 | $body['title_voice_id'] = $titleVoiceId; |
354 | } |
355 | |
356 | $summaryVoiceId = intval(get_post_meta($postId, 'beyondwords_summary_voice_id', true)); |
357 | |
358 | if ($summaryVoiceId > 0) { |
359 | $body['summary_voice_id'] = $summaryVoiceId; |
360 | } |
361 | |
362 | /** |
363 | * Filters the params we send to the BeyondWords API 'content' endpoint. |
364 | * |
365 | * @since 4.0.0 Introduced as beyondwords_body_params |
366 | * @since 4.3.0 Renamed from beyondwords_body_params to beyondwords_content_params |
367 | * |
368 | * @param array $body The params we send to the BeyondWords API. |
369 | * @param array $postId WordPress post ID. |
370 | */ |
371 | $body = apply_filters('beyondwords_content_params', $body, $postId); |
372 | |
373 | return wp_json_encode($body); |
374 | } |
375 | |
376 | /** |
377 | * Get the post metadata to send with BeyondWords API requests. |
378 | * |
379 | * The metadata key is defined by the BeyondWords API as "A custom object |
380 | * for storing meta information". |
381 | * |
382 | * The metadata values are used to create filters for playlists in the |
383 | * BeyondWords dashboard. |
384 | * |
385 | * We currently only include taxonomies by default, and the output of this |
386 | * method can be filtered using the `beyondwords_post_metadata` filter. |
387 | * |
388 | * @since 3.3.0 |
389 | * @since 3.5.0 Moved from Core\Utils to Component\Post\PostUtils. |
390 | * @since 5.0.0 Remove beyondwords_post_metadata filter. |
391 | * |
392 | * @param int $postId Post ID. |
393 | * |
394 | * @return array |
395 | */ |
396 | public static function getMetadata($postId) |
397 | { |
398 | $metadata = new \stdClass(); |
399 | |
400 | $taxonomy = PostContentUtils::getAllTaxonomiesAndTerms($postId); |
401 | |
402 | if (count((array)$taxonomy)) { |
403 | $metadata->taxonomy = $taxonomy; |
404 | } |
405 | |
406 | return $metadata; |
407 | } |
408 | |
409 | /** |
410 | * Get all taxonomies, and their selected terms, for a post. |
411 | * |
412 | * Returns an associative array of taxonomy names and terms. |
413 | * |
414 | * For example: |
415 | * |
416 | * array( |
417 | * "categories" => array("Category 1"), |
418 | * "post_tag" => array("Tag 1", "Tag 2", "Tag 3"), |
419 | * ) |
420 | * |
421 | * @since 3.3.0 |
422 | * @since 3.5.0 Moved from Core\Utils to Component\Post\PostUtils |
423 | * |
424 | * @param int $postId Post ID. |
425 | * |
426 | * @return array |
427 | */ |
428 | public static function getAllTaxonomiesAndTerms($postId) |
429 | { |
430 | $postType = get_post_type($postId); |
431 | |
432 | $postTypeTaxonomies = get_object_taxonomies($postType); |
433 | |
434 | $taxonomies = new \stdClass(); |
435 | |
436 | foreach ($postTypeTaxonomies as $postTypeTaxonomy) { |
437 | $terms = get_the_terms($postId, $postTypeTaxonomy); |
438 | |
439 | if (! empty($terms) && ! is_wp_error($terms)) { |
440 | $taxonomies->{(string)$postTypeTaxonomy} = wp_list_pluck($terms, 'name'); |
441 | } |
442 | } |
443 | |
444 | return $taxonomies; |
445 | } |
446 | |
447 | /** |
448 | * Get author name for a post. |
449 | * |
450 | * @since 3.10.4 |
451 | * |
452 | * @param int $postId Post ID. |
453 | * |
454 | * @return string |
455 | */ |
456 | public static function getAuthorName($postId) |
457 | { |
458 | $authorId = get_post_field('post_author', $postId); |
459 | |
460 | return get_the_author_meta('display_name', $authorId); |
461 | } |
462 | |
463 | /** |
464 | * Add data-beyondwords-marker attribute to the root elements in a HTML |
465 | * string (typically the rendered HTML of a single block). |
466 | * |
467 | * Checks to see whether we can use WP_HTML_Tag_Processor, or whether we |
468 | * fall back to using DOMDocument to add the marker. |
469 | * |
470 | * @since 4.2.2 |
471 | * |
472 | * @param string $html HTML. |
473 | * @param string $marker Marker UUID. |
474 | * |
475 | * @return string HTML. |
476 | */ |
477 | public static function addMarkerAttribute($html, $marker) |
478 | { |
479 | if (! $marker) { |
480 | return $html; |
481 | } |
482 | |
483 | // Prefer WP_HTML_Tag_Processor, introduced in WordPress 6.2 |
484 | if (class_exists('WP_HTML_Tag_Processor')) { |
485 | return PostContentUtils::addMarkerAttributeWithHTMLTagProcessor($html, $marker); |
486 | } else { |
487 | return PostContentUtils::addMarkerAttributeWithDOMDocument($html, $marker); |
488 | } |
489 | } |
490 | |
491 | /** |
492 | * Add data-beyondwords-marker attribute to the root elements in a HTML |
493 | * string using WP_HTML_Tag_Processor. |
494 | * |
495 | * @since 4.0.0 |
496 | * @since 4.2.2 Moved from src/Component/Post/BlockAttributes/BlockAttributes.php |
497 | * to src/Component/Post/PostContentUtils.php |
498 | * @since 4.7.0 Prevent empty data-beyondwords-marker attributes. |
499 | * |
500 | * @param string $html HTML. |
501 | * @param string $marker Marker UUID. |
502 | * |
503 | * @return string HTML. |
504 | */ |
505 | public static function addMarkerAttributeWithHTMLTagProcessor($html, $marker) |
506 | { |
507 | if (! $marker) { |
508 | return $html; |
509 | } |
510 | |
511 | // https://github.com/WordPress/gutenberg/pull/42485 |
512 | $tags = new \WP_HTML_Tag_Processor($html); |
513 | |
514 | if ($tags->next_tag()) { |
515 | $tags->set_attribute('data-beyondwords-marker', $marker); |
516 | } |
517 | |
518 | return strval($tags); |
519 | } |
520 | |
521 | /** |
522 | * Add data-beyondwords-marker attribute to the root elements in a HTML |
523 | * string using DOMDocument. |
524 | * |
525 | * This is a fallback, since WP_HTML_Tag_Processor was only shipped with |
526 | * WordPress 6.2 on 19 April 2023. |
527 | * |
528 | * https://make.wordpress.org/core/2022/10/13/whats-new-in-gutenberg-14-3-12-october/ |
529 | * |
530 | * Note: It is not ideal to do all the $bodyElement/$fullHtml processing |
531 | * in this method, but without it DOMDocument does not work as expected if |
532 | * there is more than 1 root element. The approach here has been taken from |
533 | * some historic Gutenberg code before they implemented WP_HTML_Tag_Processor: |
534 | * |
535 | * https://github.com/WordPress/gutenberg/blob/6671cef1179412a2bbd4969cbbc82705c7f69bac/lib/block-supports/index.php |
536 | * |
537 | * @since 4.0.0 |
538 | * @since 4.2.2 Moved from src/Component/Post/BlockAttributes/BlockAttributes.php |
539 | * to src/Component/Post/PostContentUtils.php |
540 | * @since 4.7.0 Prevent empty data-beyondwords-marker attributes. |
541 | * |
542 | * @param string $html HTML. |
543 | * @param string $marker Marker UUID. |
544 | * |
545 | * @return string HTML. |
546 | */ |
547 | public static function addMarkerAttributeWithDOMDocument($html, $marker) |
548 | { |
549 | if (! $marker) { |
550 | return $html; |
551 | } |
552 | |
553 | $dom = new \DOMDocument('1.0', 'utf-8'); |
554 | |
555 | $wrappedHtml = |
556 | '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head><body>' |
557 | . $html |
558 | . '</body></html>'; |
559 | |
560 | $success = $dom->loadHTML($wrappedHtml, LIBXML_HTML_NODEFDTD | LIBXML_COMPACT); |
561 | |
562 | if (! $success) { |
563 | return $html; |
564 | } |
565 | |
566 | // Structure is like `<html><head/><body/></html>`, so body is the `lastChild` of our document. |
567 | $bodyElement = $dom->documentElement->lastChild; |
568 | |
569 | $xpath = new \DOMXPath($dom); |
570 | $blockRoot = $xpath->query('./*', $bodyElement)[0]; |
571 | |
572 | if (empty($blockRoot)) { |
573 | return $html; |
574 | } |
575 | |
576 | $blockRoot->setAttribute('data-beyondwords-marker', $marker); |
577 | |
578 | // Avoid using `$dom->saveHtml( $node )` because the node results may not produce consistent |
579 | // whitespace. Saving the root HTML `$dom->saveHtml()` prevents this behavior. |
580 | $fullHtml = $dom->saveHtml(); |
581 | |
582 | // Find the <body> open/close tags. The open tag needs to be adjusted so we get inside the tag |
583 | // and not the tag itself. |
584 | $start = strpos($fullHtml, '<body>', 0) + strlen('<body>'); |
585 | $end = strpos($fullHtml, '</body>', $start); |
586 | |
587 | return trim(substr($fullHtml, $start, $end - $start)); |
588 | } |
589 | } |