Attempt to implement CardPrints
This commit is contained in:
@@ -0,0 +1,10 @@
|
|||||||
|
package com.rak.config.converter
|
||||||
|
|
||||||
|
import com.rak.model.DiscriminatorDirection
|
||||||
|
import org.eclipse.microprofile.config.spi.Converter
|
||||||
|
|
||||||
|
class DiscriminatorDirectionConverter : Converter<DiscriminatorDirection> {
|
||||||
|
override fun convert(value: String): DiscriminatorDirection? {
|
||||||
|
return DiscriminatorDirection.from(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,9 +1,15 @@
|
|||||||
package com.rak.config.model
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithDefault
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
import java.util.Optional
|
import java.util.*
|
||||||
|
|
||||||
interface AbstractScrapeTargetConfig {
|
interface AbstractScrapeTargetConfig {
|
||||||
@WithName("root")
|
@WithName("root")
|
||||||
fun getRootConfig(): Optional<ExtractConfig>
|
fun getRootConfig(): Optional<ExtractConfig>
|
||||||
|
@WithName("multi")
|
||||||
|
@WithDefault("false")
|
||||||
|
fun isMulti(): Boolean
|
||||||
|
@WithName("discriminator")
|
||||||
|
fun getDiscriminator(): Optional<DiscriminatorConfig>
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
interface AbstractScrapeTargetFieldConfig
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface CardPrintScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
|
@WithName("id")
|
||||||
|
fun getIdConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("name")
|
||||||
|
fun getNameConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("regional-name")
|
||||||
|
fun getRegionNameConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("rarity")
|
||||||
|
fun getRarityConfig(): ScrapeTargetFieldConfig
|
||||||
|
}
|
||||||
12
src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt
Normal file
12
src/main/kotlin/com/rak/config/model/DiscriminatorConfig.kt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import com.rak.config.converter.DiscriminatorDirectionConverter
|
||||||
|
import com.rak.model.DiscriminatorDirection
|
||||||
|
import io.smallrye.config.WithConverter
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface DiscriminatorConfig : ScrapeTargetFieldConfig {
|
||||||
|
@WithName("direction")
|
||||||
|
@WithConverter(DiscriminatorDirectionConverter::class)
|
||||||
|
fun getDiscriminatorDirection(): DiscriminatorDirection
|
||||||
|
}
|
||||||
@@ -3,7 +3,7 @@ package com.rak.config.model
|
|||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
import java.util.*
|
import java.util.*
|
||||||
|
|
||||||
interface ScrapeTargetFieldConfig {
|
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
|
||||||
@WithName("root")
|
@WithName("root")
|
||||||
fun getRootConfig(): Optional<ExtractConfig>
|
fun getRootConfig(): Optional<ExtractConfig>
|
||||||
@WithName("steps")
|
@WithName("steps")
|
||||||
|
|||||||
@@ -8,4 +8,6 @@ interface TargetsConfig {
|
|||||||
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
||||||
@WithName("set")
|
@WithName("set")
|
||||||
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
||||||
|
@WithName("card-print")
|
||||||
|
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
|
||||||
}
|
}
|
||||||
@@ -2,6 +2,7 @@ package com.rak.controller
|
|||||||
|
|
||||||
import com.rak.model.card.Card
|
import com.rak.model.card.Card
|
||||||
import com.rak.model.set.CardSet
|
import com.rak.model.set.CardSet
|
||||||
|
import com.rak.model.set.RegionalSet
|
||||||
import com.rak.service.ScrapeService
|
import com.rak.service.ScrapeService
|
||||||
import jakarta.ws.rs.Consumes
|
import jakarta.ws.rs.Consumes
|
||||||
import jakarta.ws.rs.GET
|
import jakarta.ws.rs.GET
|
||||||
@@ -35,6 +36,22 @@ class ScrapeController(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@GET
|
||||||
|
@Path("/{provider}/regionalSet")
|
||||||
|
@Produces(MediaType.APPLICATION_JSON)
|
||||||
|
@Consumes(MediaType.APPLICATION_JSON)
|
||||||
|
fun scrapeRegionalSet(
|
||||||
|
@RestPath
|
||||||
|
provider: String,
|
||||||
|
@RestQuery
|
||||||
|
setName: String
|
||||||
|
): RegionalSet {
|
||||||
|
return scrapeService.scrapeRegionalSet(
|
||||||
|
provider,
|
||||||
|
setName
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Path("/{provider}/card")
|
@Path("/{provider}/card")
|
||||||
@Produces(MediaType.APPLICATION_JSON)
|
@Produces(MediaType.APPLICATION_JSON)
|
||||||
|
|||||||
17
src/main/kotlin/com/rak/model/DiscriminatorDirection.kt
Normal file
17
src/main/kotlin/com/rak/model/DiscriminatorDirection.kt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
package com.rak.model
|
||||||
|
|
||||||
|
enum class DiscriminatorDirection(val value: String) {
|
||||||
|
ASC("asc"),
|
||||||
|
DESC("desc");
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun from(value: String): DiscriminatorDirection? {
|
||||||
|
for (discriminatorDirection in DiscriminatorDirection.entries) {
|
||||||
|
if (discriminatorDirection.value == value) {
|
||||||
|
return discriminatorDirection
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
23
src/main/kotlin/com/rak/model/card/CardPrint.kt
Normal file
23
src/main/kotlin/com/rak/model/card/CardPrint.kt
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package com.rak.model.card
|
||||||
|
|
||||||
|
import com.rak.model.set.RegionalSet
|
||||||
|
|
||||||
|
data class CardPrint(
|
||||||
|
val id: String,
|
||||||
|
val name: String,
|
||||||
|
val regionalName: String? = null,
|
||||||
|
val rarity: String
|
||||||
|
) {
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun fromMap(map: Map<String, String>): CardPrint {
|
||||||
|
return CardPrint(
|
||||||
|
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
||||||
|
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
||||||
|
map["regionalName"],
|
||||||
|
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package com.rak.model.exception
|
||||||
|
|
||||||
|
class NotImplementedException(message: String) : RuntimeException(message)
|
||||||
@@ -5,4 +5,8 @@ import kotlin.collections.Set
|
|||||||
data class CardSet(
|
data class CardSet(
|
||||||
val name: String,
|
val name: String,
|
||||||
val regionalSets: Set<RegionalSet>
|
val regionalSets: Set<RegionalSet>
|
||||||
)
|
) {
|
||||||
|
companion object {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,13 +1,27 @@
|
|||||||
package com.rak.model.set
|
package com.rak.model.set
|
||||||
|
|
||||||
|
import com.rak.model.card.CardPrint
|
||||||
|
|
||||||
data class RegionalSet(
|
data class RegionalSet(
|
||||||
val prefix: String,
|
val prefix: String,
|
||||||
val region: String,
|
val region: String,
|
||||||
val regionCode: String
|
val regionCode: String,
|
||||||
|
val cardPrints: Collection<CardPrint>,
|
||||||
|
val numberOfCards: Int
|
||||||
) {
|
) {
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
|
|
||||||
|
fun fromMap(map: Map<String, String>, cardPrints: Collection<CardPrint>): RegionalSet {
|
||||||
|
return RegionalSet(
|
||||||
|
map["prefix"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
||||||
|
map["region"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
||||||
|
map["regionCode"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
||||||
|
cardPrints,
|
||||||
|
cardPrints.size
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
fun flattenFromMemberLists(
|
fun flattenFromMemberLists(
|
||||||
idList: List<String>,
|
idList: List<String>,
|
||||||
languageList: List<String>,
|
languageList: List<String>,
|
||||||
@@ -22,7 +36,9 @@ data class RegionalSet(
|
|||||||
regionalSetList.add(RegionalSet(
|
regionalSetList.add(RegionalSet(
|
||||||
prefix = idList[index],
|
prefix = idList[index],
|
||||||
region = languageList[index],
|
region = languageList[index],
|
||||||
regionCode = regionKeyAliasList[index]
|
regionCode = regionKeyAliasList[index],
|
||||||
|
listOf(),
|
||||||
|
numberOfCards = -1
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
return regionalSetList
|
return regionalSetList
|
||||||
|
|||||||
279
src/main/kotlin/com/rak/service/AbstractExtractionService.kt
Normal file
279
src/main/kotlin/com/rak/service/AbstractExtractionService.kt
Normal file
@@ -0,0 +1,279 @@
|
|||||||
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.model.AbstractScrapeTargetConfig
|
||||||
|
import com.rak.config.model.ExtractConfig
|
||||||
|
import com.rak.config.model.ProviderConfig
|
||||||
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
|
import com.rak.model.DiscriminatorDirection
|
||||||
|
import com.rak.model.Selector
|
||||||
|
import com.rak.model.exception.ElementNotFoundException
|
||||||
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
|
import com.rak.model.transform.TransformationRegistry
|
||||||
|
import com.rak.util.CssUtil
|
||||||
|
import com.rak.util.XPathUtil
|
||||||
|
import org.jsoup.nodes.Element
|
||||||
|
import org.jsoup.select.Elements
|
||||||
|
import java.util.Optional
|
||||||
|
import kotlin.jvm.optionals.getOrElse
|
||||||
|
|
||||||
|
// find root element from global or node config
|
||||||
|
// get field target configs as list
|
||||||
|
// extract item from root element via field config
|
||||||
|
|
||||||
|
abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
||||||
|
|
||||||
|
private val transformationRegistry = TransformationRegistry()
|
||||||
|
|
||||||
|
abstract fun T.getItems(): Map<String, ScrapeTargetFieldConfig>
|
||||||
|
abstract fun extract(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: T
|
||||||
|
): E
|
||||||
|
|
||||||
|
abstract fun extractMultiple(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: T
|
||||||
|
): Collection<E>
|
||||||
|
|
||||||
|
fun getRootElement(
|
||||||
|
element: Element,
|
||||||
|
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||||
|
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||||
|
): Element {
|
||||||
|
return getRootElements(
|
||||||
|
element,
|
||||||
|
globalRootExtractConfig,
|
||||||
|
nodeRootExtractConfig
|
||||||
|
).firstOrNull() ?: throw ElementNotFoundException("")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun getRootElements(
|
||||||
|
element: Element,
|
||||||
|
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||||
|
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||||
|
): Elements {
|
||||||
|
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.getOrElse {
|
||||||
|
nodeRootExtractConfig.orElseThrow {
|
||||||
|
InvalidConfigurationException("")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return getElementsFromElementByExtractConfig(
|
||||||
|
element,
|
||||||
|
rootExtractConfig
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
protected fun getElementFromDocumentByExtractConfig(
|
||||||
|
element: Element,
|
||||||
|
step: ExtractConfig,
|
||||||
|
): Element? {
|
||||||
|
return getElementsFromElementByExtractConfig(element, step).firstOrNull()
|
||||||
|
}
|
||||||
|
|
||||||
|
protected fun getElementsFromElementByExtractConfig(
|
||||||
|
element: Element,
|
||||||
|
step: ExtractConfig,
|
||||||
|
): Elements {
|
||||||
|
return if (step.selectorType() == Selector.CSS) {
|
||||||
|
element.select(step.getQueryString())
|
||||||
|
} else {
|
||||||
|
element.selectXpath(step.getQueryString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected fun extractAsMap(
|
||||||
|
document: Element,
|
||||||
|
extractionConfig: T
|
||||||
|
): Map<String, String> {
|
||||||
|
val result = mutableMapOf<String, String>()
|
||||||
|
|
||||||
|
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
|
||||||
|
val rootElement = getRootElement(
|
||||||
|
document,
|
||||||
|
extractionConfig.getRootConfig(),
|
||||||
|
fieldConfig.getRootConfig()
|
||||||
|
)
|
||||||
|
|
||||||
|
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||||
|
rootElement,
|
||||||
|
fieldConfig
|
||||||
|
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||||
|
|
||||||
|
result.put(identifier, extractedText)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
fun extractAsListOfMaps(
|
||||||
|
element: Element,
|
||||||
|
extractionConfig: T
|
||||||
|
): List<Map<String, String>> {
|
||||||
|
val resultList = mutableListOf<MutableMap<String, String>>()
|
||||||
|
|
||||||
|
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
|
||||||
|
val rootElements = getRootElements(
|
||||||
|
element,
|
||||||
|
extractionConfig.getRootConfig(),
|
||||||
|
fieldConfig.getRootConfig()
|
||||||
|
)
|
||||||
|
|
||||||
|
for(index in 0..rootElements.size - 1) {
|
||||||
|
val rootElement = rootElements[index]
|
||||||
|
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||||
|
rootElement,
|
||||||
|
fieldConfig
|
||||||
|
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||||
|
|
||||||
|
val mapToModify: MutableMap<String, String> = try {
|
||||||
|
resultList[index]
|
||||||
|
} catch (_: IndexOutOfBoundsException) {
|
||||||
|
val newMap = mutableMapOf<String, String>()
|
||||||
|
resultList.add(newMap)
|
||||||
|
newMap
|
||||||
|
}
|
||||||
|
|
||||||
|
mapToModify.put(identifier, extractedText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return resultList
|
||||||
|
}
|
||||||
|
|
||||||
|
fun extractAsListOfMaps(
|
||||||
|
elements: Elements,
|
||||||
|
extractionConfig: T
|
||||||
|
): List<Map<String, String>> {
|
||||||
|
val resultList = mutableListOf<MutableMap<String, String>>()
|
||||||
|
|
||||||
|
|
||||||
|
// refactor this
|
||||||
|
extractionConfig.getItems().forEach { (identifier, fieldConfig) ->
|
||||||
|
for(index in 0..elements.size - 1) {
|
||||||
|
val rootElement = elements[index]
|
||||||
|
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||||
|
rootElement,
|
||||||
|
fieldConfig
|
||||||
|
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||||
|
|
||||||
|
val mapToModify: MutableMap<String, String> = try {
|
||||||
|
resultList[index]
|
||||||
|
} catch (_: IndexOutOfBoundsException) {
|
||||||
|
val newMap = mutableMapOf<String, String>()
|
||||||
|
resultList.add(newMap)
|
||||||
|
newMap
|
||||||
|
}
|
||||||
|
|
||||||
|
mapToModify.put(identifier, extractedText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return resultList
|
||||||
|
}
|
||||||
|
|
||||||
|
fun extractWithDiscriminator(
|
||||||
|
element: Element,
|
||||||
|
extractionConfig: T
|
||||||
|
): List<List<Map<String, String>>>{
|
||||||
|
val rootElement = getRootElement(
|
||||||
|
element,
|
||||||
|
extractionConfig.getRootConfig(),
|
||||||
|
Optional.empty<ExtractConfig>()
|
||||||
|
)
|
||||||
|
|
||||||
|
var rootElements = getRootElements(
|
||||||
|
element,
|
||||||
|
extractionConfig.getRootConfig(),
|
||||||
|
Optional.empty<ExtractConfig>()
|
||||||
|
)
|
||||||
|
|
||||||
|
val discriminatedElements = getElementsFromElementByExtractConfig(
|
||||||
|
rootElement,
|
||||||
|
extractionConfig.getDiscriminator().get().getRootConfig().get(),
|
||||||
|
)
|
||||||
|
|
||||||
|
val discriminations = mutableListOf<String>()
|
||||||
|
val result = mutableListOf<List<Map<String, String>>>()
|
||||||
|
|
||||||
|
for (element in discriminatedElements) {
|
||||||
|
val discriminatorValue: String = extractTextFromElementByTargetFieldConfig(
|
||||||
|
element,
|
||||||
|
extractionConfig.getDiscriminator().get()
|
||||||
|
) ?: throw ElementNotFoundException("")
|
||||||
|
|
||||||
|
discriminations.add(discriminatorValue)
|
||||||
|
}
|
||||||
|
|
||||||
|
val definitiveElements = if (discriminations.size < rootElements.size) {
|
||||||
|
if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) {
|
||||||
|
rootElements = Elements(rootElements.reversed())
|
||||||
|
}
|
||||||
|
|
||||||
|
while (discriminations.size < rootElements.size) {
|
||||||
|
rootElements.removeFirst()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extractionConfig.getDiscriminator().get().getDiscriminatorDirection() == DiscriminatorDirection.DESC) {
|
||||||
|
rootElements = Elements(rootElements.reversed())
|
||||||
|
}
|
||||||
|
|
||||||
|
rootElements
|
||||||
|
} else {
|
||||||
|
rootElements
|
||||||
|
}
|
||||||
|
|
||||||
|
result.add(extractAsListOfMaps(
|
||||||
|
definitiveElements,
|
||||||
|
extractionConfig
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractTextFromElementByTargetFieldConfig(
|
||||||
|
root: Element,
|
||||||
|
extractionConfig: ScrapeTargetFieldConfig
|
||||||
|
): String? {
|
||||||
|
val extractionSteps = extractionConfig.getExtractionSteps()
|
||||||
|
val transformationSteps = extractionConfig.getOptionalTransformationSteps()
|
||||||
|
|
||||||
|
var currentElement: Element? = root.clone()
|
||||||
|
var result: String? = null
|
||||||
|
|
||||||
|
for (index in 0 until extractionSteps.size) {
|
||||||
|
val currentStep = extractionSteps.elementAtOrNull(index) ?: return null
|
||||||
|
if (currentElement == null) {
|
||||||
|
throw IllegalStateException()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == extractionSteps.size - 1) {
|
||||||
|
result = when (currentStep.selectorType()) {
|
||||||
|
Selector.CSS -> CssUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||||
|
Selector.XPATH -> XPathUtil.extractResult(currentElement, currentStep.getQueryString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
currentElement = when (currentStep.selectorType()) {
|
||||||
|
Selector.CSS -> CssUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||||
|
Selector.XPATH -> XPathUtil.getNextElement(currentElement, currentStep.getQueryString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == null) {
|
||||||
|
throw ElementNotFoundException("Result could not be extracted")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (transformationSteps.isPresent) {
|
||||||
|
result = transformationRegistry.applyTransformations(result, transformationSteps.get())
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||||
|
import com.rak.config.model.ProviderConfig
|
||||||
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
|
import com.rak.config.model.SetScrapeTargetConfig
|
||||||
|
import com.rak.model.card.CardPrint
|
||||||
|
import com.rak.model.exception.NotImplementedException
|
||||||
|
import com.rak.model.set.CardSet
|
||||||
|
import com.rak.model.set.RegionalSet
|
||||||
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
import org.jsoup.nodes.Document
|
||||||
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
|
@ApplicationScoped
|
||||||
|
class CardPrintExtractionService : AbstractExtractionService<CardPrint, CardPrintScrapeTargetConfig>() {
|
||||||
|
|
||||||
|
override fun CardPrintScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
|
return mapOf(
|
||||||
|
Pair("id", this.getIdConfig()),
|
||||||
|
Pair("name", this.getNameConfig()),
|
||||||
|
Pair("regionalName", this.getRegionNameConfig()),
|
||||||
|
Pair("rarity", this.getRarityConfig()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun extract(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: CardPrintScrapeTargetConfig
|
||||||
|
): CardPrint {
|
||||||
|
throw NotImplementedException("Not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun extractMultiple(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: CardPrintScrapeTargetConfig
|
||||||
|
): Collection<CardPrint> {
|
||||||
|
val objectAsListOfMaps = extractWithDiscriminator(element, extractionConfig)
|
||||||
|
|
||||||
|
return objectAsListOfMaps.map {
|
||||||
|
CardPrint.fromMap(it[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,13 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.model.CardPrintScrapeTargetConfig
|
||||||
import com.rak.config.model.ExtractConfig
|
import com.rak.config.model.ExtractConfig
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
import com.rak.model.Selector
|
import com.rak.model.Selector
|
||||||
import com.rak.model.card.Card
|
import com.rak.model.card.Card
|
||||||
|
import com.rak.model.card.CardPrint
|
||||||
import com.rak.model.exception.ElementNotFoundException
|
import com.rak.model.exception.ElementNotFoundException
|
||||||
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
import com.rak.model.set.CardSet
|
import com.rak.model.set.CardSet
|
||||||
import com.rak.model.set.RegionalSet
|
import com.rak.model.set.RegionalSet
|
||||||
import com.rak.model.transform.TransformationRegistry
|
import com.rak.model.transform.TransformationRegistry
|
||||||
@@ -13,6 +16,7 @@ import jakarta.enterprise.context.ApplicationScoped
|
|||||||
import org.jsoup.nodes.Document
|
import org.jsoup.nodes.Document
|
||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
import org.jsoup.select.Elements
|
import org.jsoup.select.Elements
|
||||||
|
import java.util.Optional
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ExtractionService(
|
class ExtractionService(
|
||||||
@@ -28,6 +32,32 @@ class ExtractionService(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun getRootElement(
|
||||||
|
document: Document,
|
||||||
|
globalRootExtractConfig: Optional<ExtractConfig>,
|
||||||
|
nodeRootExtractConfig: Optional<ExtractConfig>
|
||||||
|
): Element {
|
||||||
|
val rootExtractConfig: ExtractConfig = globalRootExtractConfig.orElse(
|
||||||
|
nodeRootExtractConfig.orElseThrow {
|
||||||
|
InvalidConfigurationException("")
|
||||||
|
})
|
||||||
|
|
||||||
|
return getElementFromDocumentByExtractConfig(document, rootExtractConfig) ?: throw ElementNotFoundException("No root could be found")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun extractCardPrint(document: Document, cardPrintConfig: CardPrintScrapeTargetConfig): CardPrint? {
|
||||||
|
val cardName = extractTextFromElementByTargetFieldConfig(
|
||||||
|
getRootElement(
|
||||||
|
document,
|
||||||
|
cardPrintConfig.getRootConfig(),
|
||||||
|
cardPrintConfig.getNameConfig().getRootConfig()
|
||||||
|
),
|
||||||
|
cardPrintConfig.getNameConfig()
|
||||||
|
)
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
|
fun extractRegionalSet(root: Element, provider: String): RegionalSet {
|
||||||
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
val setExtractionConfig = source.getTargets().getSetConfig().get()
|
||||||
@@ -50,13 +80,12 @@ class ExtractionService(
|
|||||||
return RegionalSet(
|
return RegionalSet(
|
||||||
setId,
|
setId,
|
||||||
setLanguage,
|
setLanguage,
|
||||||
setKey
|
setKey,
|
||||||
|
listOf(),
|
||||||
|
-1
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
val setIdConfiguration = setExtractionConfig.getIdConfig()
|
||||||
if (!setIdConfiguration.getRootConfig().isPresent) {
|
|
||||||
throw RuntimeException("as[po") // TODO fix me
|
|
||||||
}
|
|
||||||
val rootConfiguration = setIdConfiguration.getRootConfig().get()
|
val rootConfiguration = setIdConfiguration.getRootConfig().get()
|
||||||
|
|
||||||
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
val setIdRoot = getElementFromDocumentByExtractConfig(root, rootConfiguration) ?: throw ElementNotFoundException("TODO fix this")
|
||||||
@@ -84,7 +113,9 @@ class ExtractionService(
|
|||||||
return RegionalSet(
|
return RegionalSet(
|
||||||
setId,
|
setId,
|
||||||
setLanguage,
|
setLanguage,
|
||||||
setKey
|
setKey,
|
||||||
|
listOf(),
|
||||||
|
-1
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.model.ProviderConfig
|
||||||
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
|
import com.rak.config.model.SetScrapeTargetConfig
|
||||||
|
import com.rak.config.model.SourcesConfig
|
||||||
|
import com.rak.model.exception.NotImplementedException
|
||||||
|
import com.rak.model.set.RegionalSet
|
||||||
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
|
@ApplicationScoped
|
||||||
|
class RegionalSetExtractionService(
|
||||||
|
private val cardPrintExtractionService: CardPrintExtractionService,
|
||||||
|
private val sourcesConfig: SourcesConfig
|
||||||
|
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() {
|
||||||
|
|
||||||
|
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
|
return mapOf(
|
||||||
|
Pair("prefix", this.getIdConfig()),
|
||||||
|
Pair("regionCode", this.getRegionKeyConfig()),
|
||||||
|
Pair("region", this.getLanguageConfig()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun extract(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: SetScrapeTargetConfig
|
||||||
|
): RegionalSet {
|
||||||
|
throw NotImplementedException("Not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun extractMultiple(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: SetScrapeTargetConfig
|
||||||
|
): Collection<RegionalSet> {
|
||||||
|
val regionalSetList = extractAsListOfMaps(element, extractionConfig)
|
||||||
|
val cardPrintsInRegionalSet = extractAsListOfMaps(element, extractionConfig)
|
||||||
|
|
||||||
|
val cardPrints = cardPrintExtractionService.extractMultiple(
|
||||||
|
element,
|
||||||
|
providerConfig,
|
||||||
|
providerConfig.getTargets().getCardPrintConfiguration().get()
|
||||||
|
)
|
||||||
|
|
||||||
|
return regionalSetList.map {
|
||||||
|
RegionalSet.fromMap(it, cardPrints)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ package com.rak.service
|
|||||||
|
|
||||||
import com.rak.model.card.Card
|
import com.rak.model.card.Card
|
||||||
import com.rak.model.set.CardSet
|
import com.rak.model.set.CardSet
|
||||||
|
import com.rak.model.set.RegionalSet
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
import org.jsoup.nodes.Document
|
import org.jsoup.nodes.Document
|
||||||
@@ -10,6 +11,8 @@ import org.jsoup.nodes.Document
|
|||||||
class ScrapeService(
|
class ScrapeService(
|
||||||
private val sourceService: SourceService,
|
private val sourceService: SourceService,
|
||||||
private val extractionService: ExtractionService,
|
private val extractionService: ExtractionService,
|
||||||
|
private val setExtractionService: SetExtractionService,
|
||||||
|
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||||
) {
|
) {
|
||||||
|
|
||||||
|
|
||||||
@@ -22,7 +25,20 @@ class ScrapeService(
|
|||||||
val path: String = normalizePath(setName)
|
val path: String = normalizePath(setName)
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
return extractionService.extractSet(setName, document, provider)
|
// return extractionService.extractSet(setName, document, provider)
|
||||||
|
return setExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
||||||
|
}
|
||||||
|
|
||||||
|
fun scrapeRegionalSet(
|
||||||
|
provider: String,
|
||||||
|
setName: String,
|
||||||
|
): RegionalSet {
|
||||||
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
|
||||||
|
val path: String = normalizePath(setName)
|
||||||
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
|
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
||||||
}
|
}
|
||||||
|
|
||||||
fun scrapeCard(
|
fun scrapeCard(
|
||||||
|
|||||||
42
src/main/kotlin/com/rak/service/SetExtractionService.kt
Normal file
42
src/main/kotlin/com/rak/service/SetExtractionService.kt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.model.ProviderConfig
|
||||||
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
|
import com.rak.config.model.SetScrapeTargetConfig
|
||||||
|
import com.rak.model.exception.NotImplementedException
|
||||||
|
import com.rak.model.set.CardSet
|
||||||
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
|
@ApplicationScoped
|
||||||
|
class SetExtractionService(
|
||||||
|
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||||
|
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
|
||||||
|
|
||||||
|
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
|
return mapOf(
|
||||||
|
Pair("prefix", this.getIdConfig()),
|
||||||
|
Pair("regionCode", this.getRegionKeyConfig()),
|
||||||
|
Pair("region", this.getLanguageConfig()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun extract(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: SetScrapeTargetConfig
|
||||||
|
): CardSet {
|
||||||
|
return CardSet(
|
||||||
|
"test",
|
||||||
|
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun extractMultiple(
|
||||||
|
element: Element,
|
||||||
|
providerConfig: ProviderConfig,
|
||||||
|
extractionConfig: SetScrapeTargetConfig
|
||||||
|
): Collection<CardSet> {
|
||||||
|
throw NotImplementedException("Not implemented")
|
||||||
|
}
|
||||||
|
}
|
||||||
19
src/main/kotlin/com/rak/util/CssUtil.kt
Normal file
19
src/main/kotlin/com/rak/util/CssUtil.kt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package com.rak.util
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
|
class CssUtil private constructor() {
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun getNextElement(element: Element, path: String): Element? {
|
||||||
|
return element.select(path).firstOrNull()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun extractResult(root: Element, path: String): String? {
|
||||||
|
return root
|
||||||
|
.select(path)
|
||||||
|
.firstOrNull()?.text()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1 +1,2 @@
|
|||||||
com.rak.config.converter.TypeSelectorConverter
|
com.rak.config.converter.TypeSelectorConverter
|
||||||
|
com.rak.config.converter.DiscriminatorDirectionConverter
|
||||||
|
|||||||
@@ -54,6 +54,35 @@ scraper:
|
|||||||
steps:
|
steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//li/abbr/text()"
|
value: "//li/abbr/text()"
|
||||||
|
card-print:
|
||||||
|
multi: true
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".tabber.wds-tabber > div"
|
||||||
|
discriminator:
|
||||||
|
direction: asc
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: ".wds-tabs__tab"
|
||||||
|
steps:
|
||||||
|
- type: xpath
|
||||||
|
value: "//li/div/a/text()"
|
||||||
|
id:
|
||||||
|
steps:
|
||||||
|
- type: xpath
|
||||||
|
value: ".//table/tbody/tr[2]/td[1]/a/text()"
|
||||||
|
name:
|
||||||
|
steps:
|
||||||
|
- type: xpath
|
||||||
|
value: ".//table/tbody/tr[2]/td[1]/a/text()"
|
||||||
|
regional-name:
|
||||||
|
steps:
|
||||||
|
- type: xpath
|
||||||
|
value: ".//table/tbody/tr[2]/td[2]/a/text()"
|
||||||
|
rarity:
|
||||||
|
steps:
|
||||||
|
- type: xpath
|
||||||
|
value: ".//table/tbody/tr[2]/td[3]/a/text()"
|
||||||
card:
|
card:
|
||||||
name:
|
name:
|
||||||
root:
|
root:
|
||||||
|
|||||||
Reference in New Issue
Block a user