Skip to content

Commit 98e5d66

Browse files
authored
no ref (#605)
Added set-podcast tool allowing us to bulk set facebook descriptions from the first audio card in posts tagged podcast. Useful for post substack migrations
1 parent fd44143 commit 98e5d66

File tree

4 files changed

+617
-0
lines changed

4 files changed

+617
-0
lines changed

bin/cli.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import getPosts from '../commands/get-posts.js';
3939
import setTemplate from '../commands/set-template.js';
4040
import addMemberCompFromCSVCommands from '../commands/add-member-comp-from-csv.js';
4141
import setFeaturedImages from '../commands/set-featured-images.js';
42+
import setPodcast from '../commands/set-podcast.js';
4243
import cleanSlugs from '../commands/clean-slugs.js';
4344

4445
prettyCLI.command(addMemberCompSubscriptionCommands);
@@ -77,6 +78,7 @@ prettyCLI.command(getPosts);
7778
prettyCLI.command(setTemplate);
7879
prettyCLI.command(addMemberCompFromCSVCommands);
7980
prettyCLI.command(setFeaturedImages);
81+
prettyCLI.command(setPodcast);
8082
prettyCLI.command(cleanSlugs);
8183

8284
prettyCLI.style({

commands/set-podcast.js

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import {ui} from '@tryghost/pretty-cli';
2+
import setPodcast from '../tasks/set-podcast.js';
3+
4+
// Internal ID in case we need one.
5+
const id = 'set-podcast';
6+
7+
const group = 'Content:';
8+
9+
// The command to run and any params
10+
const flags = 'set-podcast <apiURL> <adminAPIKey>';
11+
12+
// Description for the top level command
13+
const desc = 'Set Facebook description for podcast posts using the first audio src URL from the post content';
14+
15+
// Descriptions for the individual params
16+
const paramsDesc = [
17+
'URL to your Ghost API',
18+
'Admin API key'
19+
];
20+
21+
// Configure all the options
22+
const setup = (sywac) => {
23+
sywac.boolean('-V --verbose', {
24+
defaultValue: false,
25+
desc: 'Show verbose output'
26+
});
27+
sywac.number('--delayBetweenCalls', {
28+
defaultValue: 50,
29+
desc: 'The delay between API calls, in ms'
30+
});
31+
};
32+
33+
// What to do when this command is executed
34+
const run = async (argv) => {
35+
let timer = Date.now();
36+
let context = {errors: []};
37+
38+
try {
39+
// Fetch the tasks, configured correctly according to the options passed in
40+
let runner = setPodcast.getTaskRunner(argv);
41+
42+
// Run the migration
43+
await runner.run(context);
44+
} catch (error) {
45+
ui.log.error('Done with errors', context.errors);
46+
}
47+
48+
// Report success
49+
ui.log.ok(`Successfully processed ${context.processed} posts in ${Date.now() - timer}ms.`);
50+
};
51+
52+
export default {
53+
id,
54+
group,
55+
flags,
56+
desc,
57+
paramsDesc,
58+
setup,
59+
run
60+
};

tasks/set-podcast.js

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
import GhostAdminAPI from '@tryghost/admin-api';
2+
import {makeTaskRunner} from '@tryghost/listr-smart-renderer';
3+
import _ from 'lodash';
4+
import {discover} from '../lib/batch-ghost-discover.js';
5+
6+
const initialise = (options) => {
7+
return {
8+
title: 'Initialising API connection',
9+
task: (ctx, task) => {
10+
let defaults = {
11+
verbose: false,
12+
delayBetweenCalls: 50
13+
};
14+
15+
const url = options.apiURL.replace(/\/$/, '');
16+
const key = options.adminAPIKey;
17+
const api = new GhostAdminAPI({
18+
url: url.replace('localhost', '127.0.0.1'),
19+
key,
20+
version: 'v5.0'
21+
});
22+
23+
ctx.args = _.mergeWith(defaults, options);
24+
ctx.api = api;
25+
ctx.processed = 0;
26+
ctx.updated = 0;
27+
ctx.errors = [];
28+
29+
task.output = 'API connection initialised';
30+
}
31+
};
32+
};
33+
34+
/**
35+
* Extracts the first audio URL from HTML content
36+
* @param {string} html - The HTML content to search in
37+
* @returns {string|null} The first audio URL found, or null if no audio is found
38+
*/
39+
const extractFirstAudio = (html) => {
40+
// Look for audio tags first
41+
const audioRegex = /<audio[^>]+src="([^">]+)"/;
42+
const audioMatch = html.match(audioRegex);
43+
if (audioMatch) {
44+
return audioMatch[1];
45+
}
46+
47+
// Look for audio cards (common pattern in Ghost)
48+
const audioCardRegex = /<div[^>]*class="[^"]*audio[^"]*"[^>]*>[\s\S]*?src="([^">]+)"/i;
49+
const audioCardMatch = html.match(audioCardRegex);
50+
if (audioCardMatch) {
51+
return audioCardMatch[1];
52+
}
53+
54+
// Look for iframe with audio content (like SoundCloud, Spotify, etc.)
55+
const iframeRegex = /<iframe[^>]+src="([^">]*(?:soundcloud|spotify|anchor|buzzsprout|simplecast|libsyn)[^">]*)"/i;
56+
const iframeMatch = html.match(iframeRegex);
57+
if (iframeMatch) {
58+
return iframeMatch[1];
59+
}
60+
61+
return null;
62+
};
63+
64+
/**
65+
* Extracts the first audio URL from Lexical content
66+
* @param {string} lexical - The Lexical content
67+
* @returns {string|null} The first audio URL found, or null if no audio is found
68+
*/
69+
const extractFirstAudioFromLexical = (lexical) => {
70+
try {
71+
const content = JSON.parse(lexical);
72+
73+
// Look for audio nodes in Lexical content
74+
const findAudioInNodes = (nodes) => {
75+
for (const node of nodes) {
76+
if (node.type === 'audio' && node.src) {
77+
return node.src;
78+
}
79+
80+
// Check for embed nodes with audio content
81+
if (node.type === 'embed' && node.url) {
82+
if (node.url.includes('soundcloud') ||
83+
node.url.includes('spotify') ||
84+
node.url.includes('anchor') ||
85+
node.url.includes('buzzsprout') ||
86+
node.url.includes('simplecast') ||
87+
node.url.includes('libsyn')) {
88+
return node.url;
89+
}
90+
}
91+
92+
// Recursively check children
93+
if (node.children && Array.isArray(node.children)) {
94+
const childAudio = findAudioInNodes(node.children);
95+
if (childAudio) {
96+
return childAudio;
97+
}
98+
}
99+
}
100+
return null;
101+
};
102+
103+
return findAudioInNodes(content.root.children);
104+
} catch (error) {
105+
return null;
106+
}
107+
};
108+
109+
/**
110+
* Extracts the first audio URL from Mobiledoc content
111+
* @param {string} mobiledoc - The Mobiledoc content
112+
* @returns {string|null} The first audio URL found, or null if no audio is found
113+
*/
114+
const extractFirstAudioFromMobiledoc = (mobiledoc) => {
115+
try {
116+
const content = JSON.parse(mobiledoc);
117+
118+
// Look for audio cards in Mobiledoc
119+
const audioCard = content.cards.find((card) => {
120+
return card[0] === 'audio' ||
121+
card[0] === 'embed' ||
122+
card[0] === 'html';
123+
});
124+
125+
if (audioCard) {
126+
const cardData = audioCard[1];
127+
128+
// Direct audio card
129+
if (audioCard[0] === 'audio' && cardData.src) {
130+
return cardData.src;
131+
}
132+
133+
// Embed card with audio content
134+
if (audioCard[0] === 'embed' && cardData.url) {
135+
if (cardData.url.includes('soundcloud') ||
136+
cardData.url.includes('spotify') ||
137+
cardData.url.includes('anchor') ||
138+
cardData.url.includes('buzzsprout') ||
139+
cardData.url.includes('simplecast') ||
140+
cardData.url.includes('libsyn')) {
141+
return cardData.url;
142+
}
143+
}
144+
145+
// HTML card with audio content
146+
if (audioCard[0] === 'html' && cardData.html) {
147+
return extractFirstAudio(cardData.html);
148+
}
149+
}
150+
151+
return null;
152+
} catch (error) {
153+
return null;
154+
}
155+
};
156+
157+
const getFullTaskList = (options) => {
158+
return [
159+
initialise(options),
160+
{
161+
title: 'Fetching posts with podcast tag',
162+
task: async (ctx, task) => {
163+
let postDiscoveryOptions = {
164+
api: ctx.api,
165+
type: 'posts',
166+
limit: 100,
167+
include: 'tags,authors',
168+
filter: 'tag:[podcast]',
169+
progress: (options.verbose) ? true : false
170+
};
171+
172+
try {
173+
ctx.posts = await discover(postDiscoveryOptions);
174+
task.output = `Found ${ctx.posts.length} posts with podcast tag`;
175+
} catch (error) {
176+
ctx.errors.push(error);
177+
throw error;
178+
}
179+
}
180+
},
181+
{
182+
title: 'Processing posts and setting Facebook descriptions',
183+
task: async (ctx, task) => {
184+
for (const post of ctx.posts) {
185+
try {
186+
if (options.verbose) {
187+
task.output = `Processing post "${post.title}"`;
188+
}
189+
190+
let firstAudio = null;
191+
192+
// Try Lexical first
193+
if (post.lexical) {
194+
firstAudio = extractFirstAudioFromLexical(post.lexical);
195+
if (options.verbose && firstAudio) {
196+
task.output = `Found audio in Lexical content: ${firstAudio}`;
197+
}
198+
}
199+
200+
// If no audio found in Lexical, try Mobiledoc
201+
if (!firstAudio && post.mobiledoc) {
202+
firstAudio = extractFirstAudioFromMobiledoc(post.mobiledoc);
203+
if (options.verbose && firstAudio) {
204+
task.output = `Found audio in Mobiledoc content: ${firstAudio}`;
205+
}
206+
}
207+
208+
// If still no audio, try HTML as fallback
209+
if (!firstAudio && post.html) {
210+
firstAudio = extractFirstAudio(post.html);
211+
if (options.verbose && firstAudio) {
212+
task.output = `Found audio in HTML content: ${firstAudio}`;
213+
}
214+
}
215+
216+
if (firstAudio) {
217+
if (options.verbose) {
218+
task.output = `Updating post "${post.title}" with Facebook description: ${firstAudio}`;
219+
}
220+
221+
await ctx.api.posts.edit({
222+
id: post.id,
223+
og_description: firstAudio,
224+
title: post.title,
225+
status: post.status,
226+
updated_at: post.updated_at
227+
});
228+
ctx.updated = ctx.updated + 1;
229+
230+
if (options.verbose) {
231+
task.output = `Successfully updated post "${post.title}" with Facebook description: ${firstAudio}`;
232+
}
233+
} else if (options.verbose) {
234+
task.output = `No audio found in post "${post.title}"`;
235+
}
236+
237+
ctx.processed = ctx.processed + 1;
238+
239+
// Add delay between API calls
240+
if (ctx.args.delayBetweenCalls > 0) {
241+
await new Promise((resolve) => {
242+
setTimeout(resolve, ctx.args.delayBetweenCalls);
243+
});
244+
}
245+
} catch (error) {
246+
ctx.errors.push(`Error processing post "${post.title}": ${error.message}`);
247+
if (options.verbose) {
248+
task.output = `Error processing post "${post.title}": ${error.message}`;
249+
}
250+
}
251+
}
252+
253+
task.output = `Processed ${ctx.processed} posts, updated ${ctx.updated} with Facebook descriptions`;
254+
}
255+
}
256+
];
257+
};
258+
259+
const getTaskRunner = (options) => {
260+
let tasks = [];
261+
tasks = getFullTaskList(options);
262+
return makeTaskRunner(tasks, Object.assign({topLevel: true}, options));
263+
};
264+
265+
export {
266+
extractFirstAudio,
267+
extractFirstAudioFromLexical,
268+
extractFirstAudioFromMobiledoc
269+
};
270+
271+
export default {
272+
initialise,
273+
getFullTaskList,
274+
getTaskRunner
275+
};

0 commit comments

Comments
 (0)