archive parser/backend/frontend progress
ci / build (push) Waiting to run Details

This commit is contained in:
CJ_Clippy 2024-06-01 20:35:48 -08:00
parent 2faa8cfa21
commit b70db6d09d
11 changed files with 192 additions and 51 deletions

View File

@ -7,7 +7,7 @@
"test": "mocha"
},
"exports": {
"fansly": "./src/fansly.js"
"./fansly": "./src/fansly.js"
},
"keywords": [],
"author": "@CJ_Clippy",

View File

@ -1,10 +1,23 @@
const fansly = {
regex: {
const regex = {
username: new RegExp(/^https:\/\/fansly\.com\/(?:live\/)?([^\/]+)/)
}
const normalize = (url) => {
if (!url) throw new Error('normalized received a null or undefined url.');
return fromUsername(fansly.regex.username.exec(url).at(1))
}
const fromUsername = (username) => `https://fansly.com/${username}`
const url = {
normalize,
fromUsername
}
const fansly = {
regex,
url
}
export default fansly

View File

@ -1,5 +1,6 @@
import { expect } from 'chai'
import fansly from './fansly.js'
import { describe } from 'mocha'
describe('fansly', function () {
describe('regex', function () {
@ -13,4 +14,21 @@ describe('fansly', function () {
})
})
})
describe('url', function () {
describe('fromUsername', function () {
it('should accept a channel name and give us a valid channel URL', function () {
expect(fansly.url.fromUsername('projektmelody')).to.equal('https://fansly.com/projektmelody')
expect(fansly.url.fromUsername('GoodKittenVR')).to.equal('https://fansly.com/GoodKittenVR')
expect(fansly.url.fromUsername('MzLewdieB')).to.equal('https://fansly.com/MzLewdieB')
expect(fansly.url.fromUsername('340602399334871040')).to.equal('https://fansly.com/340602399334871040')
})
})
describe('normalize', function () {
it('should accept a live URL and return a normal channel url.', function () {
expect(fansly.url.normalize('https://fansly.com/live/projektmelody')).to.equal('https://fansly.com/projektmelody')
expect(fansly.url.normalize('https://fansly.com/live/340602399334871040')).to.equal('https://fansly.com/340602399334871040')
expect(fansly.url.normalize('https://fansly.com/live/GoodKittenVR')).to.equal('https://fansly.com/GoodKittenVR')
})
})
})
})

1
packages/pg-pubsub Submodule

@ -0,0 +1 @@
Subproject commit 02e159182d462d17866f5dee720c315781c2bdec

32
packages/scout/src/cb.js Normal file
View File

@ -0,0 +1,32 @@
import cheerio from 'cheerio'
/**
*
* @param {Object} limiter An instance of node-rate-limiter, see https://github.com/jhurliman/node-rate-limiter
* @param {String} roomUrl example: https://chaturbate.com/projektmelody
* @returns {Object} initialRoomDossier
*/
export async function getInitialRoomDossier(limiter, roomUrl) {
await limiter.removeTokens(1);
try {
const res = await fetch(roomUrl, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
}
});
const body = await res.text()
const $ = cheerio.load(body);
let rawScript = $('script:contains(window.initialRoomDossier)').html();
if (!rawScript) {
throw new Error('window.initialRoomDossier is null. This could mean the channel is in password mode');
}
let rawDossier = rawScript.slice(rawScript.indexOf('"'), rawScript.lastIndexOf('"') + 1);
let dossier = JSON.parse(JSON.parse(rawDossier));
return dossier;
} catch (error) {
// Handle the error gracefully
console.log(`Error fetching initial room dossier: ${error.message}`);
return null; // Or any other appropriate action you want to take
}
}

View File

@ -0,0 +1,14 @@
import { describe } from 'mocha'
import { expect } from 'chai';
import { getInitialRoomDossier } from './cb.js'
import { RateLimiter } from "limiter";
describe('cb', function () {
let limiter = new RateLimiter({ tokensPerInterval: 10, interval: "minute" })
describe('getInitialRoomDossier', function () {
it('should return json', async function () {
const dossier = await getInitialRoomDossier(limiter, 'https://chaturbate.com/projektmelody')
expect(dossier).to.have.property('wschat_host')
})
})
})

View File

@ -18,14 +18,14 @@ async function handleMessage({email, msg}) {
const body = await email.loadMessage(msg.uid)
console.log(' ✏️ checking e-mail')
const { isMatch, url, platform, channel, displayName, date } = (await checkEmail(body))
const { isMatch, url, platform, channel, displayName, date, userId } = (await checkEmail(body))
if (isMatch) {
console.log(' ✏️✏️ signalling realtime')
await signalRealtime({ url, platform, channel, displayName, date })
await signalRealtime({ url, platform, channel, displayName, date, userId })
console.log(' ✏️✏️ creating stream entry in db')
await createStreamInDb({ source: 'email', platform, channel, date, url })
await createStreamInDb({ source: 'email', platform, channel, date, url, userId })
}
console.log(' ✏️ archiving e-mail')

View File

@ -14,11 +14,12 @@ const definitions = [
{
platform: 'fansly',
selectors: {
url: ($) => $("a[href*='/live/']").attr('href'),
displayName: 'div[class*="message-col"] div:nth-child(5)'
channel: ($) => $("a[href*='/live/']").attr('href').toString().split('/').at(-1),
displayName: 'div[class*="message-col"] div:nth-child(5)',
userId: ($) => $("img[src*='/api/v1/account/']").attr('src').toString().split('/').at(-2)
},
from: 'no-reply@fansly.com',
template: 'https://fansly.com/live/:channel',
template: 'https://fansly.com/:channel',
regex: /https:\/\/fansly.com\/live\/([a-zA-Z0-9_]+)/
}
]
@ -69,12 +70,16 @@ export async function checkEmail (body) {
res[s] = (def.selectors[s] instanceof Object) ? def.selectors[s]($) : $(def.selectors[s]).text()
}
// console.log(`res.url=${res.url}`)
// Step 2, get values using regex & templates
res.channel = (() => {
if (res.channel) return res.channel;
if (def.regex && res.url) return def.regex.exec(res.url).at(1);
})()
res.userId = res.userId || null
res.url = res.url || render(def.template, { channel: res.channel })

View File

@ -8,23 +8,25 @@ const __dirname = import.meta.dirname;
describe('parsers', function () {
describe('checkEmail', function () {
it('should detect fansly e-mails', async function () {
it('should detect fansly e-mails and return channel data', async function () {
const mailBody = await fs.readFile(path.join(__dirname, './fixtures/fansly.fixture.txt'), { encoding: 'utf8' })
const { isMatch, channel, platform, url, date } = await checkEmail(mailBody)
const { isMatch, channel, platform, url, date, userId } = await checkEmail(mailBody)
expect(isMatch).to.equal(true, 'a Fansly heuristic was not found')
expect(platform).to.equal('fansly')
expect(channel).to.equal('SkiaObsidian')
expect(url).to.equal('https://fansly.com/live/SkiaObsidian')
expect(url).to.equal('https://fansly.com/SkiaObsidian')
expect(date).to.equal('2024-05-05T03:04:33.000Z')
expect(userId).to.equal('555722198917066752')
})
it('should detect cb e-mails', async function () {
it('should detect cb e-mails and return channel data', async function () {
const mailBody = await fs.readFile(path.join(__dirname, './fixtures/chaturbate.fixture.txt'), { encoding: 'utf8' })
const { isMatch, channel, platform, url, date } = await checkEmail(mailBody)
const { isMatch, channel, platform, url, date, userId } = await checkEmail(mailBody)
expect(isMatch).to.equal(true, 'a CB heuristic was not found')
expect(platform).to.equal('chaturbate')
expect(channel).to.equal('skyeanette')
expect(url).to.equal('https://chaturbate.com/skyeanette')
expect(date).to.equal('2023-07-24T01:08:28.000Z')
expect(userId).to.equal(null) // this info is not in the CB e-mail
})
})
})

View File

@ -50,7 +50,7 @@ export async function signalRealtime ({ url, platform, channel, displayName, dat
*
* It's a 3 step process, with each step outlined in the function body.
*/
export async function createStreamInDb ({ source, platform, channel, date, url }) {
export async function createStreamInDb ({ source, platform, channel, date, url, userId }) {
let vtuberId, streamId
@ -61,12 +61,21 @@ export async function createStreamInDb ({ source, platform, channel, date, url }
// If the vtuber is not in the db, we create the vtuber record.
// GET /api/:pluralApiId?filters[field][operator]=value
const findVtubersQueryString = qs.stringify({
filters: {
chaturbate: (platform === 'chaturbate') ? { '$eq': url } : null,
fansly: (platform === 'fansly') ? { '$eq': url } : null
const findVtubersFilters = (() => {
if (platform === 'chaturbate') {
return { chaturbate: { $eq: url } }
} else if (platform === 'fansly') {
if (!userId) throw new Error('Fansly userId was undefined, but it is required.')
return { fanslyId: { $eq: userId } }
}
})
})()
console.log('>>>>> the following is findVtubersFilters.')
console.log(findVtubersFilters)
const findVtubersQueryString = qs.stringify({
filters: findVtubersFilters
}, { encode: false })
console.log(`>>>>> platform=${platform}, url=${url}, userId=${userId}`)
console.log('>> findVtuber')
const findVtuberRes = await fetch(`${process.env.STRAPI_URL}/api/vtubers?${findVtubersQueryString}`, {
@ -78,10 +87,11 @@ export async function createStreamInDb ({ source, platform, channel, date, url }
const findVtuberJson = await findVtuberRes.json()
if (findVtuberJson.data.length > 0) {
console.log('>>a vtuber was FOUND')
vtuberId = findVtuberJson.data.id
if (findVtuberJson.data.length > 1) throw new Error('There was more than one vtuber match. There must only be one.')
vtuberId = findVtuberJson.data[0].id
console.log('here is the findVtuberJson (as follows)')
console.log(findVtuberJson)
console.log(`the matching vtuber has ID=${vtuberId} (${findVtuberJson.data.attributes.displayName})`)
console.log(`the matching vtuber has ID=${vtuberId} (${findVtuberJson.data[0].attributes.displayName})`)
}
if (!vtuberId) {
@ -94,19 +104,20 @@ export async function createStreamInDb ({ source, platform, channel, date, url }
},
body: JSON.stringify({
data: {
'displayName': channel,
'fansly': (platform === 'fansly') ? url : null,
'chaturbate': (platform === 'chaturbate') ? url : null,
'slug': slugify(channel),
'description1': ' ',
'image': 'https://placehold.co/200x200.png',
'themeColor': '#dde1ec'
displayName: channel,
fansly: (platform === 'fansly') ? url : null,
fanslyId: (platform === 'fansly') ? userId : null,
chaturbate: (platform === 'chaturbate') ? url : null,
slug: slugify(channel),
description1: ' ',
image: 'https://futureporn-b2.b-cdn.net/200x200.png',
themeColor: '#dde1ec'
}
})
})
const createVtuberJson = await createVtuberRes.json()
console.log('>> createVtuberJson as follows')
console.log(createVtuberJson)
console.log(JSON.stringify(createVtuberJson, null, 2))
if (createVtuberJson.data) {
vtuberId = createVtuberJson.data.id
console.log(`>>> vtuber created with id=${vtuberId}`)
@ -159,16 +170,11 @@ export async function createStreamInDb ({ source, platform, channel, date, url }
// qs.stringify({
// populate: 'vtuber',
// populate: '*',
// filters: {
// date: {
// "$eq": '2024-01-09T08:00:00.000Z'
// isFanslyStream: {
// "$eq": true
// },
// vtuber: {
// id: {
// '$eq': 1
// }
// }
// }
// }, {
// encode: false

View File

@ -10,17 +10,33 @@ module.exports = {
const cuid = init({ length });
event.params.data.cuid = cuid();
}
/**
* Here we set the stream platform based on related platformNotifications.
* For example, if there is a related fansly platformNotification, we set isFanslyStream to true.
*/
console.log('hello my good sir, we are about to set the stream platform based on related platformNotifications.')
console.log('in order to make sure we have the data we need, let us console log the data.')
console.log(data)
},
async afterUpdate(event) {
console.log(`>>>>>>>>>>>>>> STREAM is afterUpdate !!!!!!!!!!!!`);
const { data, where, select, populate } = event.params;
console.log(data);
const id = where.id;
// greets https://forum.strapi.io/t/how-to-get-previous-component-data-in-lifecycle-hook/25892/4?u=ggr247
/**
* This is where we populate the archiveStatus, based on the vods we have (or do not have.)
* We do this to display to the visitor the archival state of this stream.
* This state is what populates the any% archival speedrun on the `/vt/:slug` pages.
*
* Vods with a note are automatically considered, 'issue'
* A stream with no vods is considered, 'missing'
* At least 1 vod with no notes is considred, 'good'
*
* greets https://forum.strapi.io/t/how-to-get-previous-component-data-in-lifecycle-hook/25892/4?u=ggr247
*/
const existingData = await strapi.entityService.findOne("api::stream.stream", id, {
populate: ['vods', 'tweet']
})
@ -49,13 +65,47 @@ module.exports = {
archive_status: archiveStatus,
});
if (!!existingData.tweet) {
await strapi.db.connection("streams").where({ id: id }).update({
is_chaturbate_stream: existingData.tweet.isChaturbateInvite,
is_fansly_stream: existingData.tweet.isFanslyInvite
});
/**
* This is where we populate platform, based on the related platformNotification content-types.
* We do this so the UI has the data it needs to display the platform on which the stream took place.
*
* If any platformNotification is from fansly, isFanslyStream is set to true.
* If any platformNotification is from chaturbate, isChaturbateStream is set to true.
*/
const existingData2 = await strapi.entityService.findOne("api::stream.stream", id, {
populate: ['platformNotifications']
})
let isFanslyStream = false
let isChaturbateStream = false
// Iterate through all vods to determine archiveStatus
for (const pn of existingData2.platformNotifications) {
if (pn.platform === 'fansly') {
isFanslyStream = true
} else if (pn.platform === 'chaturbate') {
isChaturbateStream = true
}
}
// we can't use query engine here, because that would trigger an infinite loop
// where this
// instead we access knex instance
await strapi.db.connection("streams").where({ id: id }).update({
is_fansly_stream: isFanslyStream,
is_chaturbate_stream: isChaturbateStream
});
// Old way, @deprecated. keeping as a comment until I'm sure I don't need it
// if (!!existingData.tweet) {
// await strapi.db.connection("streams").where({ id: id }).update({
// is_chaturbate_stream: existingData.tweet.isChaturbateInvite,
// is_fansly_stream: existingData.tweet.isFanslyInvite
// });
// }
}
};