diff --git a/src/main/kotlin/com/rak/service/ExtractionService.kt b/src/main/kotlin/com/rak/service/ExtractionService.kt deleted file mode 100644 index 3f8e881..0000000 --- a/src/main/kotlin/com/rak/service/ExtractionService.kt +++ /dev/null @@ -1,273 +0,0 @@ -package com.rak.service - -import com.rak.config.model.CardPrintScrapeTargetConfig -import com.rak.config.model.ExtractConfig -import com.rak.config.model.ScrapeTargetFieldConfig -import com.rak.model.Selector -import com.rak.model.card.Card -import com.rak.model.card.CardPrint -import com.rak.model.exception.ElementNotFoundException -import com.rak.model.exception.InvalidConfigurationException -import com.rak.model.set.CardSet -import com.rak.model.set.RegionalSet -import com.rak.model.transform.TransformationRegistry -import com.rak.util.XPathUtil -import jakarta.enterprise.context.ApplicationScoped -import org.jsoup.nodes.Document -import org.jsoup.nodes.Element -import org.jsoup.select.Elements -import java.util.Optional - -@ApplicationScoped -class ExtractionService( - private val sourceService: SourceService, -) { - - private val transformationRegistry = TransformationRegistry() - - fun extractSet(setName: String, root: Element, provider: String): CardSet { - return CardSet( - name = setName, - regionalSets = extractRegionalSets(root, provider) - ) - } - - fun getRootElement( - document: Document, - globalRootExtractConfig: Optional, - nodeRootExtractConfig: Optional - ): Element { - val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse( - nodeRootExtractConfig.orElseThrow { - InvalidConfigurationException("") - }) - - return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found") - } - - fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? { - val cardName = extractTextFromElementByTargetFieldConfig( - getRootElement( - document, - cardPrintConfig.getRootConfig(), - cardPrintConfig.getNameConfig().getRootConfig() - ), - cardPrintConfig.getNameConfig() - ) - - return null - } - - fun extractRegionalSet(root: Element, provider: String): RegionalSet { - val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") - val setExtractionConfig = source.getTargets().getSetConfig().get() - - if (setExtractionConfig.getRootConfig().isPresent) { - val setId: String = extractTextFromElementByTargetFieldConfig( - root, - setExtractionConfig.getIdConfig(), - - ) ?: throw IllegalStateException("Parameter 'id' could not be found") - val setLanguage: String = extractTextFromElementByTargetFieldConfig( - root, - setExtractionConfig.getLanguageConfig() - ) ?: throw IllegalStateException("Parameter 'language' could not be found") - val setKey: String = extractTextFromElementByTargetFieldConfig( - root, - setExtractionConfig.getRegionKeyConfig() - ) ?: throw IllegalStateException("Parameter 'key' could not be found") - - return RegionalSet( - setId, - setLanguage, - setKey, - listOf(), - -1 - ) - } else { - val setIdConfiguration = setExtractionConfig.getIdConfig() - val rootConfiguration = setIdConfiguration.getRootConfig().get() - - val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") - val setId: String = extractTextFromElementByTargetFieldConfig( - setIdRoot, - setIdConfiguration - ) ?: throw IllegalStateException("Parameter 'id' could not be found") - - - val setLanguageConfiguration = setExtractionConfig.getIdConfig() - val setLanguageRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") - val setLanguage: String = extractTextFromElementByTargetFieldConfig( - setLanguageRoot, - setLanguageConfiguration - ) ?: throw IllegalStateException("Parameter 'language' could not be found") - - - val setKeyConfiguration = setExtractionConfig.getIdConfig() - val setKeyRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this") - val setKey: String = extractTextFromElementByTargetFieldConfig( - setKeyRoot, - setKeyConfiguration - ) ?: throw IllegalStateException("Parameter 'key' could not be found") - - return RegionalSet( - setId, - setLanguage, - setKey, - listOf(), - -1 - ) - } - } - - fun extractRegionalSets(root: Element, provider: String): Set { - val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") - val setExtractionConfig = source.getTargets().getSetConfig().get() - - if (setExtractionConfig.getRootConfig().isPresent) { - val rootConfiguration = setExtractionConfig.getRootConfig().get() - val regionalSetRoots: Elements = getElementsFromDocumentByExtractConfig( - root, - rootConfiguration - ) - - return regionalSetRoots.map { - extractRegionalSet( - it, - provider - ) - }.toSet() - } else { - try { - val setIdConfiguration = setExtractionConfig.getIdConfig() - val setIdRoot = getElementsFromDocumentByExtractConfig(root, setIdConfiguration.getRootConfig().get()) - val setIds = setIdRoot.map { - extractTextFromElementByTargetFieldConfig( - it, - setIdConfiguration - ) ?: throw IllegalStateException("Parameter 'id' could not be found") - } - - val languageConfiguration = setExtractionConfig.getLanguageConfig() - val languageRoot = getElementsFromDocumentByExtractConfig(root, languageConfiguration.getRootConfig().get()) - val languages = languageRoot.map { - extractTextFromElementByTargetFieldConfig( - it, - languageConfiguration - ) ?: throw IllegalStateException("Parameter 'id' could not be found") - } - - val setKeyConfiguration = setExtractionConfig.getRegionKeyConfig() - val setKeyRoot = getElementsFromDocumentByExtractConfig(root, setKeyConfiguration.getRootConfig().get()) - val setKeys = setKeyRoot.map { - extractTextFromElementByTargetFieldConfig( - it, - setKeyConfiguration - ) ?: throw IllegalStateException("Parameter 'id' could not be found") - } - - return RegionalSet.flattenFromMemberLists( - setIds, - languages, - setKeys - ) - } catch (ex: NoSuchElementException) { - throw RuntimeException("sdfgs") // TODO handle me - } - } - } - - fun extractCard(root: Document, provider: String): Card? { - val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") - val cardSelector = source.getTargets().getCardConfig().get() - - val rootConfigurationOptional = cardSelector.getRootConfig() - - if (rootConfigurationOptional.isPresent) { - val rootConfiguration = rootConfigurationOptional.get() - val rootElement: Element = getElementFromDocumentByExtractConfig( - root, - rootConfiguration - ) ?: throw ElementNotFoundException("TODO make this better") - - val englishCardName: String = extractTextFromElementByTargetFieldConfig( - rootElement, - cardSelector.getEnglishNameConfig() - ) ?: throw IllegalStateException("Parameter 'name' could not be found") - - val cardType: String = extractTextFromElementByTargetFieldConfig( - rootElement, - cardSelector.getEnglishNameConfig() - ) ?: throw IllegalStateException("Parameter 'name' could not be found") - - val description: String = extractTextFromElementByTargetFieldConfig( - rootElement, - cardSelector.getEnglishNameConfig() - ) ?: throw IllegalStateException("Parameter 'name' could not be found") - - return null - } else { - return null - - } - - } - - private fun getElementsFromDocumentByExtractConfig( - document: Element, - step: ExtractConfig - ): Elements { - return if (step.selectorType() == Selector.CSS) { - document.select(step.getQueryString()) - } else { - document.selectXpath(step.getQueryString()) - } - } - - private fun getElementFromDocumentByExtractConfig( - document: Element, - step: ExtractConfig, - ): Element? { - return if (step.selectorType() == Selector.CSS) { - document.select(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("") - } else { - document.selectXpath(step.getQueryString()).firstOrNull() ?: throw ElementNotFoundException("") - } - } - - private fun extractTextFromElementByTargetFieldConfig( - root: Element, - extractionConfig: ScrapeTargetFieldConfig - ): String? { - val extractionSteps = extractionConfig.getExtractionSteps() - val transformationSteps = extractionConfig.getOptionalTransformationSteps() - - var currentElement: Element? = root.clone() - var result: String? = null - - for (index in 0 until extractionSteps.size) { - val currentStep = extractionSteps.elementAtOrNull(index) ?: return null - if (currentElement == null) { - throw IllegalStateException() - } - - if (index == extractionSteps.size - 1) { - result = XPathUtil.extractResult(currentElement, currentStep.getQueryString()) - } - else { - currentElement = XPathUtil.getNextElement(currentElement, currentStep.getQueryString()) - } - } - - if (result == null) { - throw ElementNotFoundException("Result could not be extracted") - } - - if (transformationSteps.isPresent) { - result = transformationRegistry.applyTransformations(result, transformationSteps.get()) - } - - return result - } - -} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/ScrapeService.kt b/src/main/kotlin/com/rak/service/ScrapeService.kt index 403a7ea..268e903 100644 --- a/src/main/kotlin/com/rak/service/ScrapeService.kt +++ b/src/main/kotlin/com/rak/service/ScrapeService.kt @@ -1,8 +1,11 @@ package com.rak.service import com.rak.model.card.Card +import com.rak.model.exception.NotImplementedException import com.rak.model.set.CardSet import com.rak.model.set.RegionalSet +import com.rak.service.extract.RegionalSetExtractionService +import com.rak.service.extract.SetExtractionService import jakarta.enterprise.context.ApplicationScoped import org.jsoup.Jsoup import org.jsoup.nodes.Document @@ -10,7 +13,6 @@ import org.jsoup.nodes.Document @ApplicationScoped class ScrapeService( private val sourceService: SourceService, - private val extractionService: ExtractionService, private val setExtractionService: SetExtractionService, private val regionalSetExtractionService: RegionalSetExtractionService ) { @@ -25,7 +27,6 @@ class ScrapeService( val path: String = normalizePath(setName) val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() -// return extractionService.extractSet(setName, document, provider) return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get()) } @@ -45,12 +46,8 @@ class ScrapeService( provider: String, cardName: String, ): Card? { - val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") + throw NotImplementedException("Not implemented") - val path: String = normalizePath(cardName) - val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() - - return extractionService.extractCard(document, provider) } private fun normalizePath(path: String): String = path diff --git a/src/main/kotlin/com/rak/service/AbstractExtractionService.kt b/src/main/kotlin/com/rak/service/extract/AbstractExtractionService.kt similarity index 98% rename from src/main/kotlin/com/rak/service/AbstractExtractionService.kt rename to src/main/kotlin/com/rak/service/extract/AbstractExtractionService.kt index da92332..5c89763 100644 --- a/src/main/kotlin/com/rak/service/AbstractExtractionService.kt +++ b/src/main/kotlin/com/rak/service/extract/AbstractExtractionService.kt @@ -1,10 +1,9 @@ -package com.rak.service +package com.rak.service.extract import com.rak.config.model.AbstractScrapeTargetConfig import com.rak.config.model.ExtractConfig import com.rak.config.model.ProviderConfig import com.rak.config.model.ScrapeTargetFieldConfig -import com.rak.config.model.TransformationStepConfig import com.rak.model.Selector import com.rak.model.exception.ElementNotFoundException import com.rak.model.exception.InvalidConfigurationException diff --git a/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt b/src/main/kotlin/com/rak/service/extract/CardPrintExtractionService.kt similarity index 98% rename from src/main/kotlin/com/rak/service/CardPrintExtractionService.kt rename to src/main/kotlin/com/rak/service/extract/CardPrintExtractionService.kt index 17a627d..4d66fa4 100644 --- a/src/main/kotlin/com/rak/service/CardPrintExtractionService.kt +++ b/src/main/kotlin/com/rak/service/extract/CardPrintExtractionService.kt @@ -1,4 +1,4 @@ -package com.rak.service +package com.rak.service.extract import com.rak.config.model.CardPrintScrapeTargetConfig import com.rak.config.model.ProviderConfig diff --git a/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt b/src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt similarity index 98% rename from src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt rename to src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt index 75a4a3a..1144f19 100644 --- a/src/main/kotlin/com/rak/service/RegionalSetExtractionService.kt +++ b/src/main/kotlin/com/rak/service/extract/RegionalSetExtractionService.kt @@ -1,4 +1,4 @@ -package com.rak.service +package com.rak.service.extract import com.rak.config.model.ProviderConfig import com.rak.config.model.ScrapeTargetFieldConfig diff --git a/src/main/kotlin/com/rak/service/SetExtractionService.kt b/src/main/kotlin/com/rak/service/extract/SetExtractionService.kt similarity index 98% rename from src/main/kotlin/com/rak/service/SetExtractionService.kt rename to src/main/kotlin/com/rak/service/extract/SetExtractionService.kt index f2a4e32..9aa9d28 100644 --- a/src/main/kotlin/com/rak/service/SetExtractionService.kt +++ b/src/main/kotlin/com/rak/service/extract/SetExtractionService.kt @@ -1,4 +1,4 @@ -package com.rak.service +package com.rak.service.extract import com.rak.config.model.ProviderConfig import com.rak.config.model.ScrapeTargetFieldConfig