Refactor packages
Remove ExtractionService
This commit is contained in:
@@ -1,273 +0,0 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||
import com.rak.config.model.ExtractConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.model.Selector
|
||||
import com.rak.model.card.Card
|
||||
import com.rak.model.card.CardPrint
|
||||
import com.rak.model.exception.ElementNotFoundException
|
||||
import com.rak.model.exception.InvalidConfigurationException
|
||||
import com.rak.model.set.CardSet
|
||||
import com.rak.model.set.RegionalSet
|
||||
import com.rak.model.transform.TransformationRegistry
|
||||
import com.rak.util.XPathUtil
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.select.Elements
|
||||
import java.util.Optional
|
||||
|
||||
@ApplicationScoped
|
||||
class ExtractionService(
|
||||
private val sourceService: SourceService,
|
||||
) {
|
||||
|
||||
private val transformationRegistry = TransformationRegistry()
|
||||
|
||||
fun extractSet(setName: String, root: Element, provider: String): CardSet {
|
||||
return CardSet(
|
||||
name = setName,
|
||||
regionalSets = extractRegionalSets(root, provider)
|
||||
)
|
||||
}
|
||||
|
||||
fun getRootElement(
|
||||
document: Document,
|
||||
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||
): Element {
|
||||
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
|
||||
nodeRootExtractConfig.orElseThrow {
|
||||
InvalidConfigurationException("")
|
||||
})
|
||||
|
||||
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
|
||||
}
|
||||
|
||||
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
|
||||
val cardName = extractTextFromElementByTargetFieldConfig(
|
||||
getRootElement(
|
||||
document,
|
||||
cardPrintConfig.getRootConfig(),
|
||||
cardPrintConfig.getNameConfig().getRootConfig()
|
||||
),
|
||||
cardPrintConfig.getNameConfig()
|
||||
)
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
||||
|
||||
if (setExtractionConfig.getRootConfig().isPresent) {
|
||||
val setId: String = extractTextFromElementByTargetFieldConfig(
|
||||
root,
|
||||
setExtractionConfig.getIdConfig(),
|
||||
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
|
||||
root,
|
||||
setExtractionConfig.getLanguageConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||
val setKey: String = extractTextFromElementByTargetFieldConfig(
|
||||
root,
|
||||
setExtractionConfig.getRegionKeyConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||
|
||||
return RegionalSet(
|
||||
setId,
|
||||
setLanguage,
|
||||
setKey,
|
||||
listOf(),
|
||||
-1
|
||||
)
|
||||
} else {
|
||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
||||
val rootConfiguration = setIdConfiguration.getRootConfig().get()
|
||||
|
||||
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||
val setId: String = extractTextFromElementByTargetFieldConfig(
|
||||
setIdRoot,
|
||||
setIdConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
|
||||
|
||||
val setLanguageConfiguration = setExtractionConfig.getIdConfig()
|
||||
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
|
||||
setLanguageRoot,
|
||||
setLanguageConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||
|
||||
|
||||
val setKeyConfiguration = setExtractionConfig.getIdConfig()
|
||||
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||
val setKey: String = extractTextFromElementByTargetFieldConfig(
|
||||
setKeyRoot,
|
||||
setKeyConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||
|
||||
return RegionalSet(
|
||||
setId,
|
||||
setLanguage,
|
||||
setKey,
|
||||
listOf(),
|
||||
-1
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
||||
|
||||
if (setExtractionConfig.getRootConfig().isPresent) {
|
||||
val rootConfiguration = setExtractionConfig.getRootConfig().get()
|
||||
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
|
||||
root,
|
||||
rootConfiguration
|
||||
)
|
||||
|
||||
return regionalSetRoots.map {
|
||||
extractRegionalSet(
|
||||
it,
|
||||
provider
|
||||
)
|
||||
}.toSet()
|
||||
} else {
|
||||
try {
|
||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
||||
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
|
||||
val setIds = setIdRoot.map {
|
||||
extractTextFromElementByTargetFieldConfig(
|
||||
it,
|
||||
setIdConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
}
|
||||
|
||||
val languageConfiguration = setExtractionConfig.getLanguageConfig()
|
||||
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
|
||||
val languages = languageRoot.map {
|
||||
extractTextFromElementByTargetFieldConfig(
|
||||
it,
|
||||
languageConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
}
|
||||
|
||||
val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig()
|
||||
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
|
||||
val setKeys = setKeyRoot.map {
|
||||
extractTextFromElementByTargetFieldConfig(
|
||||
it,
|
||||
setKeyConfiguration
|
||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||
}
|
||||
|
||||
return RegionalSet.flattenFromMemberLists(
|
||||
setIds,
|
||||
languages,
|
||||
setKeys
|
||||
)
|
||||
} catch (ex: NoSuchElementException) {
|
||||
throw RuntimeException("sdfgs") // TODO handle me
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun extractCard(root: Document, provider: String): Card? {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
val cardSelector = source.getTargets().getCardConfig().get()
|
||||
|
||||
val rootConfigurationOptional = cardSelector.getRootConfig()
|
||||
|
||||
if (rootConfigurationOptional.isPresent) {
|
||||
val rootConfiguration = rootConfigurationOptional.get()
|
||||
val rootElement: Element = getElementFromDocumentByExtractConfig(
|
||||
root,
|
||||
rootConfiguration
|
||||
) ?: throw ElementNotFoundException("TODO make this better")
|
||||
|
||||
val englishCardName: String = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
cardSelector.getEnglishNameConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||
|
||||
val cardType: String = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
cardSelector.getEnglishNameConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||
|
||||
val description: String = extractTextFromElementByTargetFieldConfig(
|
||||
rootElement,
|
||||
cardSelector.getEnglishNameConfig()
|
||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
||||
|
||||
return null
|
||||
} else {
|
||||
return null
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private fun getElementsFromDocumentByExtractConfig(
|
||||
document: Element,
|
||||
step: ExtractConfig
|
||||
): Elements {
|
||||
return if (step.selectorType() == Selector.CSS) {
|
||||
document.select(step.getQueryString())
|
||||
} else {
|
||||
document.selectXpath(step.getQueryString())
|
||||
}
|
||||
}
|
||||
|
||||
private fun getElementFromDocumentByExtractConfig(
|
||||
document: Element,
|
||||
step: ExtractConfig,
|
||||
): Element? {
|
||||
return if (step.selectorType() == Selector.CSS) {
|
||||
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
||||
} else {
|
||||
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractTextFromElementByTargetFieldConfig(
|
||||
root: Element,
|
||||
extractionConfig: ScrapeTargetFieldConfig
|
||||
): String? {
|
||||
val extractionSteps = extractionConfig.getExtractionSteps()
|
||||
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
|
||||
|
||||
var currentElement: Element? = root.clone()
|
||||
var result: String? = null
|
||||
|
||||
for (index in 0 until extractionSteps.size) {
|
||||
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
|
||||
if (currentElement == null) {
|
||||
throw IllegalStateException()
|
||||
}
|
||||
|
||||
if (index == extractionSteps.size - 1) {
|
||||
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||
}
|
||||
else {
|
||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||
}
|
||||
}
|
||||
|
||||
if (result == null) {
|
||||
throw ElementNotFoundException("Result could not be extracted")
|
||||
}
|
||||
|
||||
if (transformationSteps.isPresent) {
|
||||
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,8 +1,11 @@
|
||||
package com.rak.service
|
||||
|
||||
import com.rak.model.card.Card
|
||||
import com.rak.model.exception.NotImplementedException
|
||||
import com.rak.model.set.CardSet
|
||||
import com.rak.model.set.RegionalSet
|
||||
import com.rak.service.extract.RegionalSetExtractionService
|
||||
import com.rak.service.extract.SetExtractionService
|
||||
import jakarta.enterprise.context.ApplicationScoped
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
@@ -10,7 +13,6 @@ import org.jsoup.nodes.Document
|
||||
@ApplicationScoped
|
||||
class ScrapeService(
|
||||
private val sourceService: SourceService,
|
||||
private val extractionService: ExtractionService,
|
||||
private val setExtractionService: SetExtractionService,
|
||||
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||
) {
|
||||
@@ -25,7 +27,6 @@ class ScrapeService(
|
||||
val path: String = normalizePath(setName)
|
||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||
|
||||
// return extractionService.extractSet(setName, document, provider)
|
||||
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
||||
}
|
||||
|
||||
@@ -45,12 +46,8 @@ class ScrapeService(
|
||||
provider: String,
|
||||
cardName: String,
|
||||
): Card? {
|
||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||
throw NotImplementedException("Not implemented")
|
||||
|
||||
val path: String = normalizePath(cardName)
|
||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||
|
||||
return extractionService.extractCard(document, provider)
|
||||
}
|
||||
|
||||
private fun normalizePath(path: String): String = path
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
package com.rak.service
|
||||
package com.rak.service.extract
|
||||
|
||||
import com.rak.config.model.AbstractScrapeTargetConfig
|
||||
import com.rak.config.model.ExtractConfig
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
import com.rak.config.model.TransformationStepConfig
|
||||
import com.rak.model.Selector
|
||||
import com.rak.model.exception.ElementNotFoundException
|
||||
import com.rak.model.exception.InvalidConfigurationException
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.rak.service
|
||||
package com.rak.service.extract
|
||||
|
||||
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||
import com.rak.config.model.ProviderConfig
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.rak.service
|
||||
package com.rak.service.extract
|
||||
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.rak.service
|
||||
package com.rak.service.extract
|
||||
|
||||
import com.rak.config.model.ProviderConfig
|
||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||
Reference in New Issue
Block a user