Refactor packages

Remove ExtractionService
This commit is contained in:
2025-06-26 12:48:19 +02:00
parent edc604231f
commit 052bdd6a52
6 changed files with 8 additions and 285 deletions

View File

@@ -1,273 +0,0 @@
package com.rak.service
import com.rak.config.model.CardPrintScrapeTargetConfig
import com.rak.config.model.ExtractConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.model.Selector
import com.rak.model.card.Card
import com.rak.model.card.CardPrint
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.model.transform.TransformationRegistry
import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import java.util.Optional
@ApplicationScoped
class ExtractionService(
private val sourceService: SourceService,
) {
private val transformationRegistry = TransformationRegistry()
fun extractSet(setName: String, root: Element, provider: String): CardSet {
return CardSet(
name = setName,
regionalSets = extractRegionalSets(root, provider)
)
}
fun getRootElement(
document: Document,
globalRootExtractConfig: Optional<ExtractConfig>,
nodeRootExtractConfig: Optional<ExtractConfig>
): Element {
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
nodeRootExtractConfig.orElseThrow {
InvalidConfigurationException("")
})
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
}
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
val cardName = extractTextFromElementByTargetFieldConfig(
getRootElement(
document,
cardPrintConfig.getRootConfig(),
cardPrintConfig.getNameConfig().getRootConfig()
),
cardPrintConfig.getNameConfig()
)
return null
}
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val setExtractionConfig = source.getTargets().getSetConfig().get()
if (setExtractionConfig.getRootConfig().isPresent) {
val setId: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getIdConfig(),
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getLanguageConfig()
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKey: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getRegionKeyConfig()
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId,
setLanguage,
setKey,
listOf(),
-1
)
} else {
val setIdConfiguration = setExtractionConfig.getIdConfig()
val rootConfiguration = setIdConfiguration.getRootConfig().get()
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setId: String = extractTextFromElementByTargetFieldConfig(
setIdRoot,
setIdConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguageConfiguration = setExtractionConfig.getIdConfig()
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
setLanguageRoot,
setLanguageConfiguration
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKeyConfiguration = setExtractionConfig.getIdConfig()
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setKey: String = extractTextFromElementByTargetFieldConfig(
setKeyRoot,
setKeyConfiguration
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId,
setLanguage,
setKey,
listOf(),
-1
)
}
}
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val setExtractionConfig = source.getTargets().getSetConfig().get()
if (setExtractionConfig.getRootConfig().isPresent) {
val rootConfiguration = setExtractionConfig.getRootConfig().get()
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
root,
rootConfiguration
)
return regionalSetRoots.map {
extractRegionalSet(
it,
provider
)
}.toSet()
} else {
try {
val setIdConfiguration = setExtractionConfig.getIdConfig()
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
val setIds = setIdRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
setIdConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val languageConfiguration = setExtractionConfig.getLanguageConfig()
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
val languages = languageRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
languageConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig()
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
val setKeys = setKeyRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
setKeyConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
return RegionalSet.flattenFromMemberLists(
setIds,
languages,
setKeys
)
} catch (ex: NoSuchElementException) {
throw RuntimeException("sdfgs") // TODO handle me
}
}
}
fun extractCard(root: Document, provider: String): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val cardSelector = source.getTargets().getCardConfig().get()
val rootConfigurationOptional = cardSelector.getRootConfig()
if (rootConfigurationOptional.isPresent) {
val rootConfiguration = rootConfigurationOptional.get()
val rootElement: Element = getElementFromDocumentByExtractConfig(
root,
rootConfiguration
) ?: throw ElementNotFoundException("TODO make this better")
val englishCardName: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val cardType: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val description: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
return null
} else {
return null
}
}
private fun getElementsFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Elements {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString())
} else {
document.selectXpath(step.getQueryString())
}
}
private fun getElementFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig,
): Element? {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
} else {
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
}
}
private fun extractTextFromElementByTargetFieldConfig(
root: Element,
extractionConfig: ScrapeTargetFieldConfig
): String? {
val extractionSteps = extractionConfig.getExtractionSteps()
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until extractionSteps.size) {
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == extractionSteps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
}
else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
}
}
if (result == null) {
throw ElementNotFoundException("Result could not be extracted")
}
if (transformationSteps.isPresent) {
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
}
return result
}
}

View File

@@ -1,8 +1,11 @@
package com.rak.service
import com.rak.model.card.Card
import com.rak.model.exception.NotImplementedException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.service.extract.RegionalSetExtractionService
import com.rak.service.extract.SetExtractionService
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
@@ -10,7 +13,6 @@ import org.jsoup.nodes.Document
@ApplicationScoped
class ScrapeService(
private val sourceService: SourceService,
private val extractionService: ExtractionService,
private val setExtractionService: SetExtractionService,
private val regionalSetExtractionService: RegionalSetExtractionService
) {
@@ -25,7 +27,6 @@ class ScrapeService(
val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
// return extractionService.extractSet(setName, document, provider)
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
}
@@ -45,12 +46,8 @@ class ScrapeService(
provider: String,
cardName: String,
): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
throw NotImplementedException("Not implemented")
val path: String = normalizePath(cardName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractCard(document, provider)
}
private fun normalizePath(path: String): String = path

View File

@@ -1,10 +1,9 @@
package com.rak.service
package com.rak.service.extract
import com.rak.config.model.AbstractScrapeTargetConfig
import com.rak.config.model.ExtractConfig
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.TransformationStepConfig
import com.rak.model.Selector
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException

View File

@@ -1,4 +1,4 @@
package com.rak.service
package com.rak.service.extract
import com.rak.config.model.CardPrintScrapeTargetConfig
import com.rak.config.model.ProviderConfig

View File

@@ -1,4 +1,4 @@
package com.rak.service
package com.rak.service.extract
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig

View File

@@ -1,4 +1,4 @@
package com.rak.service
package com.rak.service.extract
import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig