Split Set/RegionalSet properly
This commit is contained in:
@@ -0,0 +1,12 @@
|
|||||||
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
|
interface RegionalSetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
|
@WithName("id")
|
||||||
|
fun getIdConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("language")
|
||||||
|
fun getLanguageConfig(): ScrapeTargetFieldConfig
|
||||||
|
@WithName("region-key")
|
||||||
|
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
|
||||||
|
}
|
||||||
@@ -3,10 +3,6 @@ package com.rak.config.model
|
|||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
interface SetScrapeTargetConfig : AbstractScrapeTargetConfig {
|
||||||
@WithName("id")
|
@WithName("name")
|
||||||
fun getIdConfig(): ScrapeTargetFieldConfig
|
fun getNameConfig(): ScrapeTargetFieldConfig
|
||||||
@WithName("language")
|
|
||||||
fun getLanguageConfig(): ScrapeTargetFieldConfig
|
|
||||||
@WithName("region-key")
|
|
||||||
fun getRegionKeyConfig(): ScrapeTargetFieldConfig
|
|
||||||
}
|
}
|
||||||
@@ -8,6 +8,8 @@ interface TargetsConfig {
|
|||||||
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
fun getCardConfig(): Optional<CardScrapeTargetConfig>
|
||||||
@WithName("set")
|
@WithName("set")
|
||||||
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
fun getSetConfig(): Optional<SetScrapeTargetConfig>
|
||||||
|
@WithName("regional-set")
|
||||||
|
fun getRegionalSetConfig(): Optional<RegionalSetScrapeTargetConfig>
|
||||||
@WithName("card-print")
|
@WithName("card-print")
|
||||||
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
|
fun getCardPrintConfiguration(): Optional<CardPrintScrapeTargetConfig>
|
||||||
}
|
}
|
||||||
@@ -1,12 +1,15 @@
|
|||||||
package com.rak.model.set
|
package com.rak.model.set
|
||||||
|
|
||||||
import kotlin.collections.Set
|
|
||||||
|
|
||||||
data class CardSet(
|
data class CardSet(
|
||||||
var name: String,
|
var name: String,
|
||||||
val regionalSets: Set<RegionalSet>
|
val regionalSets: Set<RegionalSet>
|
||||||
) {
|
) {
|
||||||
companion object {
|
companion object {
|
||||||
|
fun fromMap(map: Map<String, String>, regionalSet: Set<RegionalSet>): CardSet {
|
||||||
|
return CardSet(
|
||||||
|
map["name"] ?: throw IllegalStateException("Parameter 'name' not found"),
|
||||||
|
regionalSet
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -7,7 +7,7 @@ import com.rak.model.exception.TargetNotFoundException
|
|||||||
import com.rak.model.set.CardSet
|
import com.rak.model.set.CardSet
|
||||||
import com.rak.model.set.RegionalSet
|
import com.rak.model.set.RegionalSet
|
||||||
import com.rak.service.extract.RegionalSetExtractionService
|
import com.rak.service.extract.RegionalSetExtractionService
|
||||||
import com.rak.service.extract.SetExtractionService
|
import com.rak.service.extract.CardSetExtractionService
|
||||||
import io.quarkus.logging.Log
|
import io.quarkus.logging.Log
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
@@ -17,7 +17,7 @@ import java.lang.Exception
|
|||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ScrapeService(
|
class ScrapeService(
|
||||||
private val sourceService: SourceService,
|
private val sourceService: SourceService,
|
||||||
private val setExtractionService: SetExtractionService,
|
private val cardSetExtractionService: CardSetExtractionService,
|
||||||
private val regionalSetExtractionService: RegionalSetExtractionService,
|
private val regionalSetExtractionService: RegionalSetExtractionService,
|
||||||
private val commonCrawlService: CommonCrawlService
|
private val commonCrawlService: CommonCrawlService
|
||||||
) {
|
) {
|
||||||
@@ -59,13 +59,11 @@ class ScrapeService(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return setExtractionService.extract(
|
return cardSetExtractionService.extract(
|
||||||
document,
|
document,
|
||||||
source,
|
source,
|
||||||
source.getTargets().getSetConfig().get()
|
source.getTargets().getSetConfig().get()
|
||||||
).apply {
|
)
|
||||||
name = setName
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fun scrapeRegionalSet(
|
fun scrapeRegionalSet(
|
||||||
@@ -77,7 +75,7 @@ class ScrapeService(
|
|||||||
val path: String = normalizePath(setName)
|
val path: String = normalizePath(setName)
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
|
|
||||||
return regionalSetExtractionService.extract(document, source, source.getTargets().getSetConfig().get())
|
return regionalSetExtractionService.extract(document, source, source.getTargets().getRegionalSetConfig().get())
|
||||||
}
|
}
|
||||||
|
|
||||||
fun scrapeCard(
|
fun scrapeCard(
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.config.model.CardScrapeTargetConfig
|
import com.rak.config.model.CardScrapeTargetConfig
|
||||||
import com.rak.config.model.SetScrapeTargetConfig
|
import com.rak.config.model.RegionalSetScrapeTargetConfig
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.SourcesConfig
|
import com.rak.config.model.SourcesConfig
|
||||||
import com.rak.model.exception.InvalidConfigurationException
|
import com.rak.model.exception.InvalidConfigurationException
|
||||||
@@ -21,7 +21,7 @@ class SourceService(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun validateSource(providerConfig: ProviderConfig) {
|
private fun validateSource(providerConfig: ProviderConfig) {
|
||||||
val optionalRegionalSetConfig = providerConfig.getTargets().getSetConfig()
|
val optionalRegionalSetConfig = providerConfig.getTargets().getRegionalSetConfig()
|
||||||
val optionalCardConfig = providerConfig.getTargets().getCardConfig()
|
val optionalCardConfig = providerConfig.getTargets().getCardConfig()
|
||||||
|
|
||||||
if (optionalRegionalSetConfig.isPresent) {
|
if (optionalRegionalSetConfig.isPresent) {
|
||||||
@@ -33,7 +33,7 @@ class SourceService(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun validateSetExtractConfig(setExtractConfig: SetScrapeTargetConfig) {
|
private fun validateSetExtractConfig(setExtractConfig: RegionalSetScrapeTargetConfig) {
|
||||||
val selectors = listOf(
|
val selectors = listOf(
|
||||||
setExtractConfig.getLanguageConfig(),
|
setExtractConfig.getLanguageConfig(),
|
||||||
setExtractConfig.getIdConfig(),
|
setExtractConfig.getIdConfig(),
|
||||||
|
|||||||
@@ -9,15 +9,13 @@ import jakarta.enterprise.context.ApplicationScoped
|
|||||||
import org.jsoup.nodes.Element
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class SetExtractionService(
|
class CardSetExtractionService(
|
||||||
private val regionalSetExtractionService: RegionalSetExtractionService
|
private val regionalSetExtractionService: RegionalSetExtractionService
|
||||||
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
|
) : AbstractExtractionService<CardSet, SetScrapeTargetConfig>() {
|
||||||
|
|
||||||
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
return mapOf(
|
return mapOf(
|
||||||
Pair("prefix", this.getIdConfig()),
|
Pair("name", this.getNameConfig()),
|
||||||
Pair("regionCode", this.getRegionKeyConfig()),
|
|
||||||
Pair("region", this.getLanguageConfig()),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,9 +24,15 @@ class SetExtractionService(
|
|||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: SetScrapeTargetConfig
|
||||||
): CardSet {
|
): CardSet {
|
||||||
return CardSet(
|
val set = extractSingle(element, extractionConfig)
|
||||||
"test",
|
|
||||||
regionalSetExtractionService.extractMultiple(element, providerConfig, extractionConfig).toSet()
|
return CardSet.fromMap(
|
||||||
|
set,
|
||||||
|
regionalSetExtractionService.extractMultiple(
|
||||||
|
element,
|
||||||
|
providerConfig,
|
||||||
|
providerConfig.getTargets().getRegionalSetConfig().get()
|
||||||
|
).toSet()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2,7 +2,7 @@ package com.rak.service.extract
|
|||||||
|
|
||||||
import com.rak.config.model.ProviderConfig
|
import com.rak.config.model.ProviderConfig
|
||||||
import com.rak.config.model.ScrapeTargetFieldConfig
|
import com.rak.config.model.ScrapeTargetFieldConfig
|
||||||
import com.rak.config.model.SetScrapeTargetConfig
|
import com.rak.config.model.RegionalSetScrapeTargetConfig
|
||||||
import com.rak.config.model.SourcesConfig
|
import com.rak.config.model.SourcesConfig
|
||||||
import com.rak.model.card.CardPrint
|
import com.rak.model.card.CardPrint
|
||||||
import com.rak.model.exception.NotImplementedException
|
import com.rak.model.exception.NotImplementedException
|
||||||
@@ -14,9 +14,9 @@ import org.jsoup.nodes.Element
|
|||||||
class RegionalSetExtractionService(
|
class RegionalSetExtractionService(
|
||||||
private val cardPrintExtractionService: CardPrintExtractionService,
|
private val cardPrintExtractionService: CardPrintExtractionService,
|
||||||
private val sourcesConfig: SourcesConfig
|
private val sourcesConfig: SourcesConfig
|
||||||
) : AbstractExtractionService<RegionalSet, SetScrapeTargetConfig>() {
|
) : AbstractExtractionService<RegionalSet, RegionalSetScrapeTargetConfig>() {
|
||||||
|
|
||||||
override fun SetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
override fun RegionalSetScrapeTargetConfig.getItems(): Map<String, ScrapeTargetFieldConfig> {
|
||||||
return mapOf(
|
return mapOf(
|
||||||
Pair("prefix", this.getIdConfig()),
|
Pair("prefix", this.getIdConfig()),
|
||||||
Pair("regionCode", this.getRegionKeyConfig()),
|
Pair("regionCode", this.getRegionKeyConfig()),
|
||||||
@@ -27,7 +27,7 @@ class RegionalSetExtractionService(
|
|||||||
override fun extract(
|
override fun extract(
|
||||||
element: Element,
|
element: Element,
|
||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: RegionalSetScrapeTargetConfig
|
||||||
): RegionalSet {
|
): RegionalSet {
|
||||||
throw NotImplementedException("Not implemented")
|
throw NotImplementedException("Not implemented")
|
||||||
}
|
}
|
||||||
@@ -35,7 +35,7 @@ class RegionalSetExtractionService(
|
|||||||
override fun extractMultiple(
|
override fun extractMultiple(
|
||||||
element: Element,
|
element: Element,
|
||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: RegionalSetScrapeTargetConfig
|
||||||
): List<RegionalSet> {
|
): List<RegionalSet> {
|
||||||
val regionalSetList = extractMulti(element, extractionConfig)
|
val regionalSetList = extractMulti(element, extractionConfig)
|
||||||
|
|
||||||
@@ -55,7 +55,7 @@ class RegionalSetExtractionService(
|
|||||||
override fun extractNestedMultiples(
|
override fun extractNestedMultiples(
|
||||||
element: Element,
|
element: Element,
|
||||||
providerConfig: ProviderConfig,
|
providerConfig: ProviderConfig,
|
||||||
extractionConfig: SetScrapeTargetConfig
|
extractionConfig: RegionalSetScrapeTargetConfig
|
||||||
): List<List<RegionalSet>> {
|
): List<List<RegionalSet>> {
|
||||||
throw NotImplementedException("Not implemented")
|
throw NotImplementedException("Not implemented")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,6 +30,16 @@ scraper:
|
|||||||
url-pattern: "https://yugioh.fandom.com/wiki/%s"
|
url-pattern: "https://yugioh.fandom.com/wiki/%s"
|
||||||
targets:
|
targets:
|
||||||
set:
|
set:
|
||||||
|
root:
|
||||||
|
type: css
|
||||||
|
value: "aside > .pi-title"
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
extractors:
|
||||||
|
- steps:
|
||||||
|
- type: xpath
|
||||||
|
value: "//h2/text()"
|
||||||
|
regional-set:
|
||||||
root:
|
root:
|
||||||
type: css
|
type: css
|
||||||
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
|
||||||
@@ -80,6 +90,12 @@ scraper:
|
|||||||
- steps:
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "./td/span/text()"
|
value: "./td/span/text()"
|
||||||
|
transform:
|
||||||
|
- name: "regexReplace"
|
||||||
|
parameters: [
|
||||||
|
" .+",
|
||||||
|
""
|
||||||
|
]
|
||||||
validation:
|
validation:
|
||||||
pattern: "^.+-.+\\\\d.+$"
|
pattern: "^.+-.+\\\\d.+$"
|
||||||
name:
|
name:
|
||||||
|
|||||||
Reference in New Issue
Block a user