Here it must fetch every url inside <li> tags with disc type in initial webpage. The html of initial webpage is roughly this:
<body>
<li type="disc">
<a href="url1">Lorem.</a>
<a href="url2">Dolor</a>
<a href="LAST_URL3">In!</a>
</li>
<li type="disc">
<a href="url">Accusantium.</a>
<a href="url">Dolor</a>
<a href="url">Eveniet!</a>
</li>
<li type="disc">
<a href="url">Asperiores?</a>
<a href="url">Dolor</a>
<a href="url">Suscipit!</a>
</li>
<li type="disc">
<a href="url">Excepturi?</a>
<a href="url">Dolor</a>
<a href="url">Temporibus!</a>
</li>
</body>
Javascript:
const w = await mani.metax.get(work).then(r => r.json())
const url = w.source
const response = await fetch( + url)
const html = await response.text()
async function parse() {
const parser = new DOMParser()
const doc = parser.parseFromString(html, 'text/html')
booksNames = w.booksNames.split(', ')
const booksList = url.includes('bible') ? doc.querySelectorAll('div.arm') : doc.querySelectorAll('li[type="disc"]')
for (const book of booksList) {
let bookObj = {"name": booksNames[0], "type": "7d998386-e38f-4d73-97f3-db9f1d7641d8-bf1523bf-09f1-4822-b546-9a10a8263ceb"}
let bookObjStr = JSON.stringify(bookObj); booksNames.shift()
let uuid = await mani.metax.save(bookObjStr)
const b = await mani.metax.get(uuid).then(r => r.json()); b.uuid = uuid; b.chapters = []
const chaptersList = Array.from(book.querySelectorAll('a'));
let hrefsList = chaptersList.map(({href}) => ({href: href.replace('https://realschool.am:542/db/', url), done:false, doc:""}));
let current
const getData = async () => {
if (hrefsList.length === 0) {
let versesList = Array.from(current.doc.querySelectorAll('table:nth-of-type(3) td:nth-of-type(1)'))
let chapObj = {"type": "4e680ac1-c1ac-408b-af50-103843ad1054-7b348fbb-0890-418d-b176-e090b76a8c50"}
let chapObjStr = JSON.stringify(chapObj)
let chapUUID = await mani.metax.save(chapObjStr)
const c = await mani.metax.get(chapUUID).then(r => r.json()); c.uuid = chapUUID; c.verses = []
c.name = versesList[0].innerText.trim().replace('``', '').slice(4); versesList.shift()
for (let verse of versesList) {
let text = verse.innerText.trim().replace('``', '')
let obj = {"name": text.split(' ')[0], "type": "06408198-7666-43f4-b2eb-6de5715434f9-f801e312-0957-4429-871b-7d448364c65c"}
let objStr = JSON.stringify(obj)
let verseUUID = await mani.metax.save(objStr)
const v = await mani.metax.get(verseUUID).then(r => r.json()); v.uuid = verseUUID; v.text = text
await mani.metax.update(verseUUID, JSON.stringify(v))
c.verses.push(verseUUID)
}
await mani.metax.update(chapUUID, JSON.stringify(c))
b.chapters.push(chapUUID) // Adding new object in a collection
} else {
current = hrefsList[0];
const res = fetch(current.href).then(response => response.text()).then(text => {
const parser = new DOMParser()
current.doc = parser.parseFromString(html, 'text/html')
}).then(() => {
current.done = true;
console.log(current.href,'parsed successfully');
hrefsList = hrefsList.filter(({done}) => !done);
setTimeout(getData, 2000); // try next (or again)
})
}
}
getData();
let versesList = Array.from(doc.querySelectorAll('table:nth-of-type(3) td:nth-of-type(1)'))
let chapObj = {"type": "4e680ac1-c1ac-408b-af50-103843ad1054-7b348fbb-0890-418d-b176-e090b76a8c50"}
let chapObjStr = JSON.stringify(chapObj)
let chapUUID = await mani.metax.save(chapObjStr)
const c = await mani.metax.get(chapUUID).then(r => r.json()); c.uuid = chapUUID; c.verses = []
c.name = versesList[0].innerText.trim().replace('``', '').slice(4); versesList.shift()
//}
await mani.metax.update(uuid, JSON.stringify(b))
w.books.push(uuid) // Adding new object in a collection
await mani.metax.update(work, JSON.stringify(w)).then(r => alert("New book complete"))
}
}
But what it actually does is fetching every url from only first <li>, and after fetching the LAST_URL3 it causes Uncaught (in promise) TypeError: Cannot read properties of undefined (reading 'innerText'). So it seems it tries to fetch more urls from <li> and as there are no more it just fetches undefined. It must iterate and start to fetch urls already from second <li> but it doesn't. Even more, if we'll output to console any test message after calling the function getData(), it will be outputed and will be outputed before (some url) parsed successfully messages which are outputed after successful fetch of every url. Mustn't test message be outputed after parsed successfully if we called function earlier than outputed test? And if it doesn't stuck at function and moves on to the rest loop content, then why loop doesn't iterate?