Refactor packages
Remove ExtractionService
This commit is contained in:
@@ -1,273 +0,0 @@
|
|||||||
package com.rak.service
|
|
||||||
|
|
||||||
import com.rak.config.model.CardPrintScrapeTargetConfig
|
|
||||||
import com.rak.config.model.ExtractConfig
|
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
|
||||||
import com.rak.model.Selector
|
|
||||||
import com.rak.model.card.Card
|
|
||||||
import com.rak.model.card.CardPrint
|
|
||||||
import com.rak.model.exception.ElementNotFoundException
|
|
||||||
import com.rak.model.exception.InvalidConfigurationException
|
|
||||||
import com.rak.model.set.CardSet
|
|
||||||
import com.rak.model.set.RegionalSet
|
|
||||||
import com.rak.model.transform.TransformationRegistry
|
|
||||||
import com.rak.util.XPathUtil
|
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
|
||||||
import org.jsoup.nodes.Document
|
|
||||||
import org.jsoup.nodes.Element
|
|
||||||
import org.jsoup.select.Elements
|
|
||||||
import java.util.Optional
|
|
||||||
|
|
||||||
@ApplicationScoped
|
|
||||||
class ExtractionService(
|
|
||||||
private val sourceService: SourceService,
|
|
||||||
) {
|
|
||||||
|
|
||||||
private val transformationRegistry = TransformationRegistry()
|
|
||||||
|
|
||||||
fun extractSet(setName: String, root: Element, provider: String): CardSet {
|
|
||||||
return CardSet(
|
|
||||||
name = setName,
|
|
||||||
regionalSets = extractRegionalSets(root, provider)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun getRootElement(
|
|
||||||
document: Document,
|
|
||||||
globalRootExtractConfig: Optional<ExtractConfig>,
|
|
||||||
nodeRootExtractConfig: Optional<ExtractConfig>
|
|
||||||
): Element {
|
|
||||||
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
|
|
||||||
nodeRootExtractConfig.orElseThrow {
|
|
||||||
InvalidConfigurationException("")
|
|
||||||
})
|
|
||||||
|
|
||||||
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
|
|
||||||
}
|
|
||||||
|
|
||||||
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
|
|
||||||
val cardName = extractTextFromElementByTargetFieldConfig(
|
|
||||||
getRootElement(
|
|
||||||
document,
|
|
||||||
cardPrintConfig.getRootConfig(),
|
|
||||||
cardPrintConfig.getNameConfig().getRootConfig()
|
|
||||||
),
|
|
||||||
cardPrintConfig.getNameConfig()
|
|
||||||
)
|
|
||||||
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
|
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
|
||||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
|
||||||
|
|
||||||
if (setExtractionConfig.getRootConfig().isPresent) {
|
|
||||||
val setId: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
root,
|
|
||||||
setExtractionConfig.getIdConfig(),
|
|
||||||
|
|
||||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
|
||||||
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
root,
|
|
||||||
setExtractionConfig.getLanguageConfig()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
|
||||||
val setKey: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
root,
|
|
||||||
setExtractionConfig.getRegionKeyConfig()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
|
||||||
|
|
||||||
return RegionalSet(
|
|
||||||
setId,
|
|
||||||
setLanguage,
|
|
||||||
setKey,
|
|
||||||
listOf(),
|
|
||||||
-1
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
|
||||||
val rootConfiguration = setIdConfiguration.getRootConfig().get()
|
|
||||||
|
|
||||||
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
|
||||||
val setId: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
setIdRoot,
|
|
||||||
setIdConfiguration
|
|
||||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
|
||||||
|
|
||||||
|
|
||||||
val setLanguageConfiguration = setExtractionConfig.getIdConfig()
|
|
||||||
val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
|
||||||
val setLanguage: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
setLanguageRoot,
|
|
||||||
setLanguageConfiguration
|
|
||||||
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
|
||||||
|
|
||||||
|
|
||||||
val setKeyConfiguration = setExtractionConfig.getIdConfig()
|
|
||||||
val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
|
||||||
val setKey: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
setKeyRoot,
|
|
||||||
setKeyConfiguration
|
|
||||||
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
|
||||||
|
|
||||||
return RegionalSet(
|
|
||||||
setId,
|
|
||||||
setLanguage,
|
|
||||||
setKey,
|
|
||||||
listOf(),
|
|
||||||
-1
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun extractRegionalSets(root: Element, provider: String): Set<RegionalSet> {
|
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
|
||||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
|
||||||
|
|
||||||
if (setExtractionConfig.getRootConfig().isPresent) {
|
|
||||||
val rootConfiguration = setExtractionConfig.getRootConfig().get()
|
|
||||||
val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig(
|
|
||||||
root,
|
|
||||||
rootConfiguration
|
|
||||||
)
|
|
||||||
|
|
||||||
return regionalSetRoots.map {
|
|
||||||
extractRegionalSet(
|
|
||||||
it,
|
|
||||||
provider
|
|
||||||
)
|
|
||||||
}.toSet()
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
|
||||||
val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get())
|
|
||||||
val setIds = setIdRoot.map {
|
|
||||||
extractTextFromElementByTargetFieldConfig(
|
|
||||||
it,
|
|
||||||
setIdConfiguration
|
|
||||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
|
||||||
}
|
|
||||||
|
|
||||||
val languageConfiguration = setExtractionConfig.getLanguageConfig()
|
|
||||||
val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get())
|
|
||||||
val languages = languageRoot.map {
|
|
||||||
extractTextFromElementByTargetFieldConfig(
|
|
||||||
it,
|
|
||||||
languageConfiguration
|
|
||||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
|
||||||
}
|
|
||||||
|
|
||||||
val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig()
|
|
||||||
val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get())
|
|
||||||
val setKeys = setKeyRoot.map {
|
|
||||||
extractTextFromElementByTargetFieldConfig(
|
|
||||||
it,
|
|
||||||
setKeyConfiguration
|
|
||||||
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
|
||||||
}
|
|
||||||
|
|
||||||
return RegionalSet.flattenFromMemberLists(
|
|
||||||
setIds,
|
|
||||||
languages,
|
|
||||||
setKeys
|
|
||||||
)
|
|
||||||
} catch (ex: NoSuchElementException) {
|
|
||||||
throw RuntimeException("sdfgs") // TODO handle me
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun extractCard(root: Document, provider: String): Card? {
|
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
|
||||||
val cardSelector = source.getTargets().getCardConfig().get()
|
|
||||||
|
|
||||||
val rootConfigurationOptional = cardSelector.getRootConfig()
|
|
||||||
|
|
||||||
if (rootConfigurationOptional.isPresent) {
|
|
||||||
val rootConfiguration = rootConfigurationOptional.get()
|
|
||||||
val rootElement: Element = getElementFromDocumentByExtractConfig(
|
|
||||||
root,
|
|
||||||
rootConfiguration
|
|
||||||
) ?: throw ElementNotFoundException("TODO make this better")
|
|
||||||
|
|
||||||
val englishCardName: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
rootElement,
|
|
||||||
cardSelector.getEnglishNameConfig()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
|
||||||
|
|
||||||
val cardType: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
rootElement,
|
|
||||||
cardSelector.getEnglishNameConfig()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
|
||||||
|
|
||||||
val description: String = extractTextFromElementByTargetFieldConfig(
|
|
||||||
rootElement,
|
|
||||||
cardSelector.getEnglishNameConfig()
|
|
||||||
) ?: throw IllegalStateException("Parameter 'name' could not be found")
|
|
||||||
|
|
||||||
return null
|
|
||||||
} else {
|
|
||||||
return null
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun getElementsFromDocumentByExtractConfig(
|
|
||||||
document: Element,
|
|
||||||
step: ExtractConfig
|
|
||||||
): Elements {
|
|
||||||
return if (step.selectorType() == Selector.CSS) {
|
|
||||||
document.select(step.getQueryString())
|
|
||||||
} else {
|
|
||||||
document.selectXpath(step.getQueryString())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun getElementFromDocumentByExtractConfig(
|
|
||||||
document: Element,
|
|
||||||
step: ExtractConfig,
|
|
||||||
): Element? {
|
|
||||||
return if (step.selectorType() == Selector.CSS) {
|
|
||||||
document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
|
||||||
} else {
|
|
||||||
document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun extractTextFromElementByTargetFieldConfig(
|
|
||||||
root: Element,
|
|
||||||
extractionConfig: ScrapeTargetFieldConfig
|
|
||||||
): String? {
|
|
||||||
val extractionSteps = extractionConfig.getExtractionSteps()
|
|
||||||
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
|
|
||||||
|
|
||||||
var currentElement: Element? = root.clone()
|
|
||||||
var result: String? = null
|
|
||||||
|
|
||||||
for (index in 0 until extractionSteps.size) {
|
|
||||||
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
|
|
||||||
if (currentElement == null) {
|
|
||||||
throw IllegalStateException()
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index == extractionSteps.size - 1) {
|
|
||||||
result = XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result == null) {
|
|
||||||
throw ElementNotFoundException("Result could not be extracted")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (transformationSteps.isPresent) {
|
|
||||||
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,8 +1,11 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.model.card.Card
|
import com.rak.model.card.Card
|
||||||
|
import com.rak.model.exception.NotImplementedException
|
||||||
import com.rak.model.set.CardSet
|
import com.rak.model.set.CardSet
|
||||||
import com.rak.model.set.RegionalSet
|
import com.rak.model.set.RegionalSet
|
||||||
|
import com.rak.service.extract.RegionalSetExtractionService
|
||||||
|
import com.rak.service.extract.SetExtractionService
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
import org.jsoup.nodes.Document
|
import org.jsoup.nodes.Document
|
||||||
@@ -10,7 +13,6 @@ import org.jsoup.nodes.Document
|
|||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ScrapeService(
|
class ScrapeService(
|
||||||
private val sourceService: SourceService,
|
private val sourceService: SourceService,
|
||||||
private val extractionService: ExtractionService,
|
|
||||||
private val setExtractionService: SetExtractionService,
|
private val setExtractionService: SetExtractionService,
|
||||||
private val regionalSetExtractionService: RegionalSetExtractionService
|
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||||
) {
|
) {
|
||||||
@@ -25,7 +27,6 @@ class ScrapeService(
|
|||||||
val path: String = normalizePath(setName)
|
val path: String = normalizePath(setName)
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
// return extractionService.extractSet(setName, document, provider)
|
|
||||||
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -45,12 +46,8 @@ class ScrapeService(
|
|||||||
provider: String,
|
provider: String,
|
||||||
cardName: String,
|
cardName: String,
|
||||||
): Card? {
|
): Card? {
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
throw NotImplementedException("Not implemented")
|
||||||
|
|
||||||
val path: String = normalizePath(cardName)
|
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
|
||||||
|
|
||||||
return extractionService.extractCard(document, provider)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun normalizePath(path: String): String = path
|
private fun normalizePath(path: String): String = path
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
package com.rak.service
|
package com.rak.service.extract
|
||||||
|
|
||||||
import com.rak.config.model.AbstractScrapeTargetConfig
|
import com.rak.config.model.AbstractScrapeTargetConfig
|
||||||
import com.rak.config.model.ExtractConfig
|
import com.rak.config.model.ExtractConfig
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
import com.rak.config.model.TransformationStepConfig
|
|
||||||
import com.rak.model.Selector
|
import com.rak.model.Selector
|
||||||
import com.rak.model.exception.ElementNotFoundException
|
import com.rak.model.exception.ElementNotFoundException
|
||||||
import com.rak.model.exception.InvalidConfigurationException
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.rak.service
|
package com.rak.service.extract
|
||||||
|
|
||||||
import com.rak.config.model.CardPrintScrapeTargetConfig
|
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.rak.service
|
package com.rak.service.extract
|
||||||
|
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.rak.service
|
package com.rak.service.extract
|
||||||
|
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
Reference in New Issue
Block a user