Trying out model

This commit is contained in:
2025-05-30 20:46:25 +02:00
parent 4b0be3bd4e
commit 69ff62c4c0
21 changed files with 199 additions and 171 deletions

View File

@@ -1,14 +0,0 @@
package com.rak.model.config;
import org.eclipse.microprofile.config.spi.Converter;
public class TestConverter {
public static class ActualConverter implements Converter<Attribute> {
@Override
public Attribute convert(final String value) throws IllegalArgumentException, NullPointerException {
return null;
}
}
}

View File

@@ -0,0 +1,12 @@
package com.rak.config
import io.smallrye.config.WithName
interface CardDefinition {
@WithName("name")
fun nameSelector(): SelectorDefinition
@WithName("attack")
fun attackSelector(): SelectorDefinition
@WithName("effect")
fun effectSelector(): SelectorDefinition
}

View File

@@ -0,0 +1,8 @@
package com.rak.config
import java.util.*
interface Items {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
}

View File

@@ -0,0 +1,13 @@
package com.rak.config
import com.rak.config.converter.AbstractModelDefinition
import io.smallrye.config.WithName
interface RegionalSetDefinition : AbstractModelDefinition {
@WithName("id")
fun idSelector(): SelectorDefinition
@WithName("language")
fun languageSelector(): SelectorDefinition
@WithName("region-key")
fun regionKeySelector(): SelectorDefinition
}

View File

@@ -0,0 +1,5 @@
package com.rak.config
interface SelectorDefinition {
fun steps(): Set<Step>
}

View File

@@ -0,0 +1,19 @@
package com.rak.config
import io.smallrye.config.WithName
import java.util.*
interface SourceConfig {
@WithName("id")
fun getId(): String
@WithName("name")
fun getName(): String
@WithName("domain")
fun getDomain(): String
@WithName("url-patterns")
fun getUrlPatterns(): Optional<MutableSet<String>>
@WithName("selectors")
fun getItems(): Items
}

View File

@@ -1,50 +1,12 @@
package com.rak.config package com.rak.config
import io.smallrye.config.ConfigMapping import io.smallrye.config.ConfigMapping
import java.util.* import io.smallrye.config.WithName
@ConfigMapping(prefix = "scraper") @ConfigMapping(prefix = "scraper")
interface SourcesConfiguration { interface SourcesConfiguration {
fun sources(): MutableList<SourceConfig> @WithName("sources")
fun getSources(): MutableList<SourceConfig>
interface SourceConfig {
fun id(): String
fun name(): String
fun domain(): String
fun urlPatterns(): Optional<MutableSet<String>>
fun selectors(): Selectors
interface Selectors {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
interface AbstractModelDefinition {
fun root(): Optional<String>
}
interface RegionalSetDefinition : AbstractModelDefinition {
fun id(): SelectorDefinition
fun language(): SelectorDefinition
fun regionKey(): SelectorDefinition
}
interface CardDefinition {
fun name(): SelectorDefinition
fun attack(): SelectorDefinition
fun effect(): SelectorDefinition
}
interface SelectorDefinition {
fun steps(): Set<StepDefinition>
}
interface StepDefinition {
fun type(): String // e.g. css or xpath
fun value(): String
}
}
}
} }

View File

@@ -0,0 +1,13 @@
package com.rak.config
import com.rak.config.converter.TypeSelectorConverter
import com.rak.model.scrape.selector.Selector
import io.smallrye.config.WithConverter
import io.smallrye.config.WithName
interface Step {
@WithConverter(TypeSelectorConverter::class)
@WithName("type")
fun selectorType(): Selector // e.g. css or xpath
fun value(): String
}

View File

@@ -0,0 +1,9 @@
package com.rak.config.converter
import com.rak.config.Step
import io.smallrye.config.WithName
interface AbstractModelDefinition {
@WithName("root")
fun rootSelector(): Step
}

View File

@@ -0,0 +1,10 @@
package com.rak.config.converter
import com.rak.model.scrape.selector.Selector
import org.eclipse.microprofile.config.spi.Converter
class TypeSelectorConverter : Converter<Selector> {
override fun convert(value: String): Selector {
return Selector.valueOf(value.uppercase())
}
}

View File

@@ -2,22 +2,21 @@ package com.rak.controller
import com.rak.config.SourcesConfiguration import com.rak.config.SourcesConfiguration
import com.rak.service.ScrapeService import com.rak.service.ScrapeService
import io.quarkus.logging.Log import com.rak.service.SourceService
import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET import jakarta.ws.rs.GET
import jakarta.ws.rs.Path import jakarta.ws.rs.Path
import jakarta.ws.rs.Produces import jakarta.ws.rs.Produces
import jakarta.ws.rs.core.MediaType import jakarta.ws.rs.core.MediaType
import org.jboss.resteasy.reactive.RestPath
import org.jboss.resteasy.reactive.RestQuery import org.jboss.resteasy.reactive.RestQuery
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
@Path("/hello") @Path("/api")
class ExampleResource( class ExampleResource(
private val sourcesConfiguration: SourcesConfiguration, private val sourcesConfiguration: SourcesConfiguration,
private val scrapeService: ScrapeService private val scrapeService: ScrapeService,
private val sourceService: SourceService
) { ) {
companion object { companion object {
@@ -25,38 +24,34 @@ class ExampleResource(
} }
@GET @GET
@Produces(MediaType.TEXT_PLAIN) @Path("/{provider}/set")
fun hello( @Produces(MediaType.APPLICATION_JSON)
@RestQuery @Consumes(MediaType.APPLICATION_JSON)
fun scrapeSet(
@RestPath
provider: String, provider: String,
@RestQuery @RestQuery
path: String setName: String
): Map<String, String> { ): Map<String, String> {
val sources = sourcesConfiguration return scrapeService.extractSet(
.sources() provider,
.filter { setName
it.id().equals(provider, ignoreCase = true) )
} }
val source = sources.firstOrNull() ?: throw IllegalArgumentException("Provider $provider not found") @GET
@Path("/{provider}/card")
val newPath: String = path @Produces(MediaType.APPLICATION_JSON)
.trim() @Consumes(MediaType.APPLICATION_JSON)
.replace(" ", "_") fun scrapeCard(
@RestPath
val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get() provider: String,
@RestQuery
val regionalSetSelector = source.selectors().regionalSet().get() cardName: String
val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!! ): Map<String, String> {
return scrapeService.extractCard(
val setId: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps()) provider,
val setLanguage: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().language().steps()) cardName
val setKey: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().regionKey().steps())
return mapOf(
Pair("id", setId ?: "N/A"),
Pair("language", setLanguage ?: "N/A"),
Pair("key", setKey ?: "N/A"),
) )
} }
} }

View File

@@ -1,5 +0,0 @@
package com.rak.model.config
data class Attribute(
val steps: Set<Step>
)

View File

@@ -1,8 +0,0 @@
package com.rak.model.config
data class CardConfigModel(
override val root: String,
val name: Attribute,
val attack: Attribute,
val effect: Attribute,
) : ConfigModel

View File

@@ -1,5 +0,0 @@
package com.rak.model.config
interface ConfigModel {
val root: String
}

View File

@@ -1,8 +0,0 @@
package com.rak.model.config
data class RegionalSetConfigModel(
override val root: String,
val id: Attribute,
val language: Attribute,
val regionKey: Attribute
) : ConfigModel

View File

@@ -1,8 +0,0 @@
package com.rak.model.config
data class SourceConfiguration(
val id: String,
val name: String,
val urlPatterns: Set<String>,
val selectors: Set<ConfigModel>
)

View File

@@ -1,11 +0,0 @@
package com.rak.model.config
data class Step(
val type: Type,
val value: String
) {
enum class Type {
XPATH,
CSS
}
}

View File

@@ -1,20 +1,26 @@
package com.rak.service package com.rak.service
import com.rak.config.RegionalSetDefinition
import com.rak.config.SourcesConfiguration import com.rak.config.SourcesConfiguration
import com.rak.model.XPathTarget import com.rak.config.Step
import com.rak.util.XPathUtil import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
import org.jsoup.select.Evaluator
import java.util.concurrent.LinkedBlockingQueue
@ApplicationScoped @ApplicationScoped
class ScrapeService { class ScrapeService(
private val sourceService: SourceService
) {
fun extractTextFromRootBySteps( companion object {
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
}
private fun extractTextFromRootBySteps(
root: Element, root: Element,
steps: Set<SourcesConfiguration.SourceConfig.Selectors.StepDefinition> steps: Set<Step>
): String? { ): String? {
var currentElement: Element? = root.clone() var currentElement: Element? = root.clone()
var result: String? = null var result: String? = null
@@ -37,4 +43,51 @@ class ScrapeService {
return result return result
} }
fun extractSet(
provider: String,
setName: String,
): Map<String, String> {
val source =
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
val regionalSetSelector = source.getItems().regionalSet().get()
val regionalSetRoot = document.selectFirst(regionalSetSelector.rootSelector().value())!!
val setId: String? = extractTextFromRootBySteps(
regionalSetRoot,
regionalSetSelector.idSelector().steps()
)
val setLanguage: String? = extractTextFromRootBySteps(
regionalSetRoot,
regionalSetSelector.languageSelector().steps()
)
val setKey: String? = extractTextFromRootBySteps(
regionalSetRoot,
regionalSetSelector.regionKeySelector().steps()
)
return mapOf(
Pair("id", setId ?: "N/A"),
Pair("language", setLanguage ?: "N/A"),
Pair("key", setKey ?: "N/A"),
)
}
fun extractCard(
provider: String,
cardName: String,
): Map<String, String> {
val path: String = normalizePath(cardName)
return mapOf()
}
private fun normalizePath(path: String): String = path
.trim()
.replace(" ", "_")
} }

View File

@@ -1,31 +1,15 @@
package com.rak.service package com.rak.service
import com.rak.config.SourceConfig
import com.rak.config.SourcesConfiguration import com.rak.config.SourcesConfiguration
import com.rak.model.config.SourceConfiguration
import jakarta.annotation.PostConstruct
import jakarta.enterprise.context.ApplicationScoped import jakarta.enterprise.context.ApplicationScoped
@ApplicationScoped @ApplicationScoped
class SourceService ( class SourceService (
private val sourcesConfiguration: SourcesConfiguration val sourcesConfiguration: SourcesConfiguration
) { ) {
private val sources: MutableSet<SourceConfiguration> = mutableSetOf() fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
@PostConstruct
fun init() {
sourcesConfiguration
.sources()
.forEach { source ->
val config = SourceConfiguration(
source.id(),
source.name(),
source.urlPatterns().orElse(mutableSetOf()),
setOf()
)
sources.add(config)
}
}
} }

View File

@@ -1 +1 @@
com.rak.model.config.TestConverter$ActualConverter com.rak.config.converter.TypeSelectorConverter

View File

@@ -1,3 +1,7 @@
quarkus:
http:
port: 8081
scraper: scraper:
sources: sources:
- id: konami-official - id: konami-official
@@ -25,20 +29,20 @@ scraper:
- "^https://yugioh\\.fandom\\.com/wiki/.*$" - "^https://yugioh\\.fandom\\.com/wiki/.*$"
selectors: selectors:
regional-set: regional-set:
root: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" root:
type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
id: id:
steps: steps:
- type: "xpath" - type: xpath
value: "//li/text()" value: "//li/text()"
language: language:
steps: steps:
- type: "xpath" - type: xpath
value: "//li/abbr" value: "//li/abbr"
- type: "xpath" - type: xpath
value: "//abbr/@title" value: "//abbr/@title"
region-key: region-key:
steps: steps:
- type: "xpath" - type: xpath
value: "//li/abbr/text()" value: "//li/abbr/text()"
testing:
waaa: test