Trying out model

This commit is contained in:
2025-05-30 20:46:25 +02:00
parent 4b0be3bd4e
commit 69ff62c4c0
21 changed files with 199 additions and 171 deletions

View File

@@ -1,14 +0,0 @@
package com.rak.model.config;
import org.eclipse.microprofile.config.spi.Converter;
public class TestConverter {
public static class ActualConverter implements Converter<Attribute> {
@Override
public Attribute convert(final String value) throws IllegalArgumentException, NullPointerException {
return null;
}
}
}

View File

@@ -0,0 +1,12 @@
package com.rak.config
import io.smallrye.config.WithName
interface CardDefinition {
@WithName("name")
fun nameSelector(): SelectorDefinition
@WithName("attack")
fun attackSelector(): SelectorDefinition
@WithName("effect")
fun effectSelector(): SelectorDefinition
}

View File

@@ -0,0 +1,8 @@
package com.rak.config
import java.util.*
interface Items {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
}

View File

@@ -0,0 +1,13 @@
package com.rak.config
import com.rak.config.converter.AbstractModelDefinition
import io.smallrye.config.WithName
interface RegionalSetDefinition : AbstractModelDefinition {
@WithName("id")
fun idSelector(): SelectorDefinition
@WithName("language")
fun languageSelector(): SelectorDefinition
@WithName("region-key")
fun regionKeySelector(): SelectorDefinition
}

View File

@@ -0,0 +1,5 @@
package com.rak.config
interface SelectorDefinition {
fun steps(): Set<Step>
}

View File

@@ -0,0 +1,19 @@
package com.rak.config
import io.smallrye.config.WithName
import java.util.*
interface SourceConfig {
@WithName("id")
fun getId(): String
@WithName("name")
fun getName(): String
@WithName("domain")
fun getDomain(): String
@WithName("url-patterns")
fun getUrlPatterns(): Optional<MutableSet<String>>
@WithName("selectors")
fun getItems(): Items
}

View File

@@ -1,50 +1,12 @@
package com.rak.config
import io.smallrye.config.ConfigMapping
import java.util.*
import io.smallrye.config.WithName
@ConfigMapping(prefix = "scraper")
interface SourcesConfiguration {
fun sources(): MutableList<SourceConfig>
interface SourceConfig {
fun id(): String
fun name(): String
fun domain(): String
fun urlPatterns(): Optional<MutableSet<String>>
fun selectors(): Selectors
interface Selectors {
fun card(): Optional<CardDefinition>
fun regionalSet(): Optional<RegionalSetDefinition>
interface AbstractModelDefinition {
fun root(): Optional<String>
}
interface RegionalSetDefinition : AbstractModelDefinition {
fun id(): SelectorDefinition
fun language(): SelectorDefinition
fun regionKey(): SelectorDefinition
}
interface CardDefinition {
fun name(): SelectorDefinition
fun attack(): SelectorDefinition
fun effect(): SelectorDefinition
}
interface SelectorDefinition {
fun steps(): Set<StepDefinition>
}
interface StepDefinition {
fun type(): String // e.g. css or xpath
fun value(): String
}
}
}
@WithName("sources")
fun getSources(): MutableList<SourceConfig>
}

View File

@@ -0,0 +1,13 @@
package com.rak.config
import com.rak.config.converter.TypeSelectorConverter
import com.rak.model.scrape.selector.Selector
import io.smallrye.config.WithConverter
import io.smallrye.config.WithName
interface Step {
@WithConverter(TypeSelectorConverter::class)
@WithName("type")
fun selectorType(): Selector // e.g. css or xpath
fun value(): String
}

View File

@@ -0,0 +1,9 @@
package com.rak.config.converter
import com.rak.config.Step
import io.smallrye.config.WithName
interface AbstractModelDefinition {
@WithName("root")
fun rootSelector(): Step
}

View File

@@ -0,0 +1,10 @@
package com.rak.config.converter
import com.rak.model.scrape.selector.Selector
import org.eclipse.microprofile.config.spi.Converter
class TypeSelectorConverter : Converter<Selector> {
override fun convert(value: String): Selector {
return Selector.valueOf(value.uppercase())
}
}

View File

@@ -2,22 +2,21 @@ package com.rak.controller
import com.rak.config.SourcesConfiguration
import com.rak.service.ScrapeService
import io.quarkus.logging.Log
import com.rak.service.SourceService
import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET
import jakarta.ws.rs.Path
import jakarta.ws.rs.Produces
import jakarta.ws.rs.core.MediaType
import org.jboss.resteasy.reactive.RestPath
import org.jboss.resteasy.reactive.RestQuery
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
@Path("/hello")
@Path("/api")
class ExampleResource(
private val sourcesConfiguration: SourcesConfiguration,
private val scrapeService: ScrapeService
private val scrapeService: ScrapeService,
private val sourceService: SourceService
) {
companion object {
@@ -25,38 +24,34 @@ class ExampleResource(
}
@GET
@Produces(MediaType.TEXT_PLAIN)
fun hello(
@RestQuery
@Path("/{provider}/set")
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
fun scrapeSet(
@RestPath
provider: String,
@RestQuery
path: String
setName: String
): Map<String, String> {
val sources = sourcesConfiguration
.sources()
.filter {
it.id().equals(provider, ignoreCase = true)
}
return scrapeService.extractSet(
provider,
setName
)
}
val source = sources.firstOrNull() ?: throw IllegalArgumentException("Provider $provider not found")
val newPath: String = path
.trim()
.replace(" ", "_")
val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get()
val regionalSetSelector = source.selectors().regionalSet().get()
val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!!
val setId: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps())
val setLanguage: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().language().steps())
val setKey: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().regionKey().steps())
return mapOf(
Pair("id", setId ?: "N/A"),
Pair("language", setLanguage ?: "N/A"),
Pair("key", setKey ?: "N/A"),
@GET
@Path("/{provider}/card")
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
fun scrapeCard(
@RestPath
provider: String,
@RestQuery
cardName: String
): Map<String, String> {
return scrapeService.extractCard(
provider,
cardName
)
}
}

View File

@@ -1,5 +0,0 @@
package com.rak.model.config
data class Attribute(
val steps: Set<Step>
)

View File

@@ -1,8 +0,0 @@
package com.rak.model.config
data class CardConfigModel(
override val root: String,
val name: Attribute,
val attack: Attribute,
val effect: Attribute,
) : ConfigModel

View File

@@ -1,5 +0,0 @@
package com.rak.model.config
interface ConfigModel {
val root: String
}

View File

@@ -1,8 +0,0 @@
package com.rak.model.config
data class RegionalSetConfigModel(
override val root: String,
val id: Attribute,
val language: Attribute,
val regionKey: Attribute
) : ConfigModel

View File

@@ -1,8 +0,0 @@
package com.rak.model.config
data class SourceConfiguration(
val id: String,
val name: String,
val urlPatterns: Set<String>,
val selectors: Set<ConfigModel>
)

View File

@@ -1,11 +0,0 @@
package com.rak.model.config
data class Step(
val type: Type,
val value: String
) {
enum class Type {
XPATH,
CSS
}
}

View File

@@ -1,20 +1,26 @@
package com.rak.service
import com.rak.config.RegionalSetDefinition
import com.rak.config.SourcesConfiguration
import com.rak.model.XPathTarget
import com.rak.config.Step
import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
import org.jsoup.select.Evaluator
import java.util.concurrent.LinkedBlockingQueue
@ApplicationScoped
class ScrapeService {
class ScrapeService(
private val sourceService: SourceService
) {
fun extractTextFromRootBySteps(
companion object {
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
}
private fun extractTextFromRootBySteps(
root: Element,
steps: Set<SourcesConfiguration.SourceConfig.Selectors.StepDefinition>
steps: Set<Step>
): String? {
var currentElement: Element? = root.clone()
var result: String? = null
@@ -37,4 +43,51 @@ class ScrapeService {
return result
}
fun extractSet(
provider: String,
setName: String,
): Map<String, String> {
val source =
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
val regionalSetSelector = source.getItems().regionalSet().get()
val regionalSetRoot = document.selectFirst(regionalSetSelector.rootSelector().value())!!
val setId: String? = extractTextFromRootBySteps(
regionalSetRoot,
regionalSetSelector.idSelector().steps()
)
val setLanguage: String? = extractTextFromRootBySteps(
regionalSetRoot,
regionalSetSelector.languageSelector().steps()
)
val setKey: String? = extractTextFromRootBySteps(
regionalSetRoot,
regionalSetSelector.regionKeySelector().steps()
)
return mapOf(
Pair("id", setId ?: "N/A"),
Pair("language", setLanguage ?: "N/A"),
Pair("key", setKey ?: "N/A"),
)
}
fun extractCard(
provider: String,
cardName: String,
): Map<String, String> {
val path: String = normalizePath(cardName)
return mapOf()
}
private fun normalizePath(path: String): String = path
.trim()
.replace(" ", "_")
}

View File

@@ -1,31 +1,15 @@
package com.rak.service
import com.rak.config.SourceConfig
import com.rak.config.SourcesConfiguration
import com.rak.model.config.SourceConfiguration
import jakarta.annotation.PostConstruct
import jakarta.enterprise.context.ApplicationScoped
@ApplicationScoped
class SourceService (
private val sourcesConfiguration: SourcesConfiguration
val sourcesConfiguration: SourcesConfiguration
) {
private val sources: MutableSet<SourceConfiguration> = mutableSetOf()
@PostConstruct
fun init() {
sourcesConfiguration
.sources()
.forEach { source ->
val config = SourceConfiguration(
source.id(),
source.name(),
source.urlPatterns().orElse(mutableSetOf()),
setOf()
)
sources.add(config)
}
}
fun getSources(): Set<SourceConfig> = sourcesConfiguration.getSources().toSet()
fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id }
}

View File

@@ -1 +1 @@
com.rak.model.config.TestConverter$ActualConverter
com.rak.config.converter.TypeSelectorConverter

View File

@@ -1,3 +1,7 @@
quarkus:
http:
port: 8081
scraper:
sources:
- id: konami-official
@@ -25,20 +29,20 @@ scraper:
- "^https://yugioh\\.fandom\\.com/wiki/.*$"
selectors:
regional-set:
root: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
root:
type: css
value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li"
id:
steps:
- type: "xpath"
- type: xpath
value: "//li/text()"
language:
steps:
- type: "xpath"
- type: xpath
value: "//li/abbr"
- type: "xpath"
- type: xpath
value: "//abbr/@title"
region-key:
steps:
- type: "xpath"
value: "//li/abbr/text()"
testing:
waaa: test
- type: xpath
value: "//li/abbr/text()"