Refactor packages

Remove ExtractionService
This commit is contained in:
2025-06-26 12:48:19 +02:00
parent edc604231f
commit 052bdd6a52
6 changed files with 8 additions and 285 deletions

View File

@@ -1,273 +0,0 @@
package com.rak.service
import com.rak.config.model.CardPrintScrapeTargetConfig
import com.rak.config.model.ExtractConfig
import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.model.Selector
import com.rak.model.card.Card
import com.rak.model.card.CardPrint
import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException
import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet
import com.rak.model.transform.TransformationRegistry
import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import java.util.Optional
@ApplicationScoped
class ExtractionService(
private val sourceService: SourceService,
) {
private val transformationRegistry = TransformationRegistry()
fun extractSet(setName: String, root: Element, provider: String): CardSet {
return CardSet(
name = setName,
regionalSets = extractRegionalSets(root, provider)
)
}
fun getRootElement(
document: Document,
globalRootExtractConfig: Optional<ExtractConfig>,
nodeRootExtractConfig: Optional<ExtractConfig>
): Element {
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
nodeRootExtractConfig.orElseThrow {
InvalidConfigurationException("")
})
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
}
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
val cardName = extractTextFromElementByTargetFieldConfig(
getRootElement(
document,
cardPrintConfig.getRootConfig(),
cardPrintConfig.getNameConfig().getRootConfig()
),
cardPrintConfig.getNameConfig()
)
return null
}
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val setExtractionConfig = source.getTargets().getSetConfig().get()
if (setExtractionConfig.getRootConfig().isPresent) {
val setId: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getIdConfig(),
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getLanguageConfig()
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKey: String = extractTextFromElementByTargetFieldConfig(
root,
setExtractionConfig.getRegionKeyConfig()
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId,
setLanguage,
setKey,
listOf(),
-1
)
} else {
val setIdConfiguration = setExtractionConfig.getIdConfig()
val rootConfiguration = setIdConfiguration.getRootConfig().get()
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setId: String = extractTextFromElementByTargetFieldConfig(
setIdRoot,
setIdConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
val setLanguageConfiguration = setExtractionConfig.getIdConfig()
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
setLanguageRoot,
setLanguageConfiguration
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKeyConfiguration = setExtractionConfig.getIdConfig()
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
val setKey: String = extractTextFromElementByTargetFieldConfig(
setKeyRoot,
setKeyConfiguration
) ?: throw IllegalStateException("Parameter 'key' could not be found")
return RegionalSet(
setId,
setLanguage,
setKey,
listOf(),
-1
)
}
}
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val setExtractionConfig = source.getTargets().getSetConfig().get()
if (setExtractionConfig.getRootConfig().isPresent) {
val rootConfiguration = setExtractionConfig.getRootConfig().get()
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
root,
rootConfiguration
)
return regionalSetRoots.map {
extractRegionalSet(
it,
provider
)
}.toSet()
} else {
try {
val setIdConfiguration = setExtractionConfig.getIdConfig()
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
val setIds = setIdRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
setIdConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val languageConfiguration = setExtractionConfig.getLanguageConfig()
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
val languages = languageRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
languageConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig()
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
val setKeys = setKeyRoot.map {
extractTextFromElementByTargetFieldConfig(
it,
setKeyConfiguration
) ?: throw IllegalStateException("Parameter 'id' could not be found")
}
return RegionalSet.flattenFromMemberLists(
setIds,
languages,
setKeys
)
} catch (ex: NoSuchElementException) {
throw RuntimeException("sdfgs") // TODO handle me
}
}
}
fun extractCard(root: Document, provider: String): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val cardSelector = source.getTargets().getCardConfig().get()
val rootConfigurationOptional = cardSelector.getRootConfig()
if (rootConfigurationOptional.isPresent) {
val rootConfiguration = rootConfigurationOptional.get()
val rootElement: Element = getElementFromDocumentByExtractConfig(
root,
rootConfiguration
) ?: throw ElementNotFoundException("TODO make this better")
val englishCardName: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val cardType: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
val description: String = extractTextFromElementByTargetFieldConfig(
rootElement,
cardSelector.getEnglishNameConfig()
) ?: throw IllegalStateException("Parameter 'name' could not be found")
return null
} else {
return null
}
}
private fun getElementsFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig
): Elements {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString())
} else {
document.selectXpath(step.getQueryString())
}
}
private fun getElementFromDocumentByExtractConfig(
document: Element,
step: ExtractConfig,
): Element? {
return if (step.selectorType() == Selector.CSS) {
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
} else {
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
}
}
private fun extractTextFromElementByTargetFieldConfig(
root: Element,
extractionConfig: ScrapeTargetFieldConfig
): String? {
val extractionSteps = extractionConfig.getExtractionSteps()
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until extractionSteps.size) {
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == extractionSteps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
}
else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
}
}
if (result == null) {
throw ElementNotFoundException("Result could not be extracted")
}
if (transformationSteps.isPresent) {
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
}
return result
}
}

View File

@@ -1,8 +1,11 @@
package com.rak.service package com.rak.service
import com.rak.model.card.Card import com.rak.model.card.Card
import com.rak.model.exception.NotImplementedException
import com.rak.model.set.CardSet import com.rak.model.set.CardSet
import com.rak.model.set.RegionalSet import com.rak.model.set.RegionalSet
import com.rak.service.extract.RegionalSetExtractionService
import com.rak.service.extract.SetExtractionService
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
@@ -10,7 +13,6 @@ import org.jsoup.nodes.Document
@ApplicationScoped @ApplicationScoped
class ScrapeService( class ScrapeService(
private val sourceService: SourceService, private val sourceService: SourceService,
private val extractionService: ExtractionService,
private val setExtractionService: SetExtractionService, private val setExtractionService: SetExtractionService,
private val regionalSetExtractionService: RegionalSetExtractionService private val regionalSetExtractionService: RegionalSetExtractionService
) { ) {
@@ -25,7 +27,6 @@ class ScrapeService(
val path: String = normalizePath(setName) val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
// return extractionService.extractSet(setName, document, provider)
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
} }
@@ -45,12 +46,8 @@ class ScrapeService(
provider: String, provider: String,
cardName: String, cardName: String,
): Card? { ): Card? {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") throw NotImplementedException("Not implemented")
val path: String = normalizePath(cardName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
return extractionService.extractCard(document, provider)
} }
private fun normalizePath(path: String): String = path private fun normalizePath(path: String): String = path

View File

@@ -1,10 +1,9 @@
package com.rak.service package com.rak.service.extract
import com.rak.config.model.AbstractScrapeTargetConfig import com.rak.config.model.AbstractScrapeTargetConfig
import com.rak.config.model.ExtractConfig import com.rak.config.model.ExtractConfig
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.config.model.ScrapeTargetFieldConfig
import com.rak.config.model.TransformationStepConfig
import com.rak.model.Selector import com.rak.model.Selector
import com.rak.model.exception.ElementNotFoundException import com.rak.model.exception.ElementNotFoundException
import com.rak.model.exception.InvalidConfigurationException import com.rak.model.exception.InvalidConfigurationException

View File

@@ -1,4 +1,4 @@
package com.rak.service package com.rak.service.extract
import com.rak.config.model.CardPrintScrapeTargetConfig import com.rak.config.model.CardPrintScrapeTargetConfig
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig

View File

@@ -1,4 +1,4 @@
package com.rak.service package com.rak.service.extract
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.config.model.ScrapeTargetFieldConfig

View File

@@ -1,4 +1,4 @@
package com.rak.service package com.rak.service.extract
import com.rak.config.model.ProviderConfig import com.rak.config.model.ProviderConfig
import com.rak.config.model.ScrapeTargetFieldConfig import com.rak.config.model.ScrapeTargetFieldConfig