diff --git a/src/main/java/com/rak/model/config/TestConverter.java b/src/main/java/com/rak/model/config/TestConverter.java deleted file mode 100644 index 0651144..0000000 --- a/src/main/java/com/rak/model/config/TestConverter.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.rak.model.config; - -import org.eclipse.microprofile.config.spi.Converter; - -public class TestConverter { - - public static class ActualConverter implements Converter { - @Override - public Attribute convert(final String value) throws IllegalArgumentException, NullPointerException { - return null; - } - } - -} diff --git a/src/main/kotlin/com/rak/config/CardDefinition.kt b/src/main/kotlin/com/rak/config/CardDefinition.kt new file mode 100644 index 0000000..e199537 --- /dev/null +++ b/src/main/kotlin/com/rak/config/CardDefinition.kt @@ -0,0 +1,12 @@ +package com.rak.config + +import io.smallrye.config.WithName + +interface CardDefinition { + @WithName("name") + fun nameSelector(): SelectorDefinition + @WithName("attack") + fun attackSelector(): SelectorDefinition + @WithName("effect") + fun effectSelector(): SelectorDefinition +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/Items.kt b/src/main/kotlin/com/rak/config/Items.kt new file mode 100644 index 0000000..31dadb6 --- /dev/null +++ b/src/main/kotlin/com/rak/config/Items.kt @@ -0,0 +1,8 @@ +package com.rak.config + +import java.util.* + +interface Items { + fun card(): Optional + fun regionalSet(): Optional +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt b/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt new file mode 100644 index 0000000..3283721 --- /dev/null +++ b/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt @@ -0,0 +1,13 @@ +package com.rak.config + +import com.rak.config.converter.AbstractModelDefinition +import io.smallrye.config.WithName + +interface RegionalSetDefinition : AbstractModelDefinition { + @WithName("id") + fun idSelector(): SelectorDefinition + @WithName("language") + fun languageSelector(): SelectorDefinition + @WithName("region-key") + fun regionKeySelector(): SelectorDefinition +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/SelectorDefinition.kt b/src/main/kotlin/com/rak/config/SelectorDefinition.kt new file mode 100644 index 0000000..0c01ba9 --- /dev/null +++ b/src/main/kotlin/com/rak/config/SelectorDefinition.kt @@ -0,0 +1,5 @@ +package com.rak.config + +interface SelectorDefinition { + fun steps(): Set +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/SourceConfig.kt b/src/main/kotlin/com/rak/config/SourceConfig.kt new file mode 100644 index 0000000..c210a2c --- /dev/null +++ b/src/main/kotlin/com/rak/config/SourceConfig.kt @@ -0,0 +1,19 @@ +package com.rak.config + +import io.smallrye.config.WithName +import java.util.* + +interface SourceConfig { + + @WithName("id") + fun getId(): String + @WithName("name") + fun getName(): String + @WithName("domain") + fun getDomain(): String + @WithName("url-patterns") + fun getUrlPatterns(): Optional> + @WithName("selectors") + fun getItems(): Items + +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/SourcesConfiguration.kt b/src/main/kotlin/com/rak/config/SourcesConfiguration.kt index d775966..fb2abe7 100644 --- a/src/main/kotlin/com/rak/config/SourcesConfiguration.kt +++ b/src/main/kotlin/com/rak/config/SourcesConfiguration.kt @@ -1,50 +1,12 @@ package com.rak.config import io.smallrye.config.ConfigMapping -import java.util.* - +import io.smallrye.config.WithName @ConfigMapping(prefix = "scraper") interface SourcesConfiguration { - fun sources(): MutableList - - interface SourceConfig { - fun id(): String - fun name(): String - fun domain(): String - fun urlPatterns(): Optional> - fun selectors(): Selectors - - interface Selectors { - fun card(): Optional - fun regionalSet(): Optional - - interface AbstractModelDefinition { - fun root(): Optional - } - - interface RegionalSetDefinition : AbstractModelDefinition { - fun id(): SelectorDefinition - fun language(): SelectorDefinition - fun regionKey(): SelectorDefinition - } - - interface CardDefinition { - fun name(): SelectorDefinition - fun attack(): SelectorDefinition - fun effect(): SelectorDefinition - } - - interface SelectorDefinition { - fun steps(): Set - } - - interface StepDefinition { - fun type(): String // e.g. css or xpath - fun value(): String - } - } - } + @WithName("sources") + fun getSources(): MutableList } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/Step.kt b/src/main/kotlin/com/rak/config/Step.kt new file mode 100644 index 0000000..f0ceea8 --- /dev/null +++ b/src/main/kotlin/com/rak/config/Step.kt @@ -0,0 +1,13 @@ +package com.rak.config + +import com.rak.config.converter.TypeSelectorConverter +import com.rak.model.scrape.selector.Selector +import io.smallrye.config.WithConverter +import io.smallrye.config.WithName + +interface Step { + @WithConverter(TypeSelectorConverter::class) + @WithName("type") + fun selectorType(): Selector // e.g. css or xpath + fun value(): String +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/converter/AbstractModelDefinition.kt b/src/main/kotlin/com/rak/config/converter/AbstractModelDefinition.kt new file mode 100644 index 0000000..77bcc6d --- /dev/null +++ b/src/main/kotlin/com/rak/config/converter/AbstractModelDefinition.kt @@ -0,0 +1,9 @@ +package com.rak.config.converter + +import com.rak.config.Step +import io.smallrye.config.WithName + +interface AbstractModelDefinition { + @WithName("root") + fun rootSelector(): Step +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/converter/TypeSelectorConverter.kt b/src/main/kotlin/com/rak/config/converter/TypeSelectorConverter.kt new file mode 100644 index 0000000..4bf2fcf --- /dev/null +++ b/src/main/kotlin/com/rak/config/converter/TypeSelectorConverter.kt @@ -0,0 +1,10 @@ +package com.rak.config.converter + +import com.rak.model.scrape.selector.Selector +import org.eclipse.microprofile.config.spi.Converter + +class TypeSelectorConverter : Converter { + override fun convert(value: String): Selector { + return Selector.valueOf(value.uppercase()) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/controller/ExampleResource.kt b/src/main/kotlin/com/rak/controller/ExampleResource.kt index 0b9f060..46612e5 100644 --- a/src/main/kotlin/com/rak/controller/ExampleResource.kt +++ b/src/main/kotlin/com/rak/controller/ExampleResource.kt @@ -2,22 +2,21 @@ package com.rak.controller import com.rak.config.SourcesConfiguration import com.rak.service.ScrapeService -import io.quarkus.logging.Log +import com.rak.service.SourceService +import jakarta.ws.rs.Consumes import jakarta.ws.rs.GET import jakarta.ws.rs.Path import jakarta.ws.rs.Produces import jakarta.ws.rs.core.MediaType +import org.jboss.resteasy.reactive.RestPath import org.jboss.resteasy.reactive.RestQuery -import org.jsoup.Jsoup -import org.jsoup.nodes.Document -import org.jsoup.nodes.Element -import org.jsoup.nodes.TextNode -@Path("/hello") +@Path("/api") class ExampleResource( private val sourcesConfiguration: SourcesConfiguration, - private val scrapeService: ScrapeService + private val scrapeService: ScrapeService, + private val sourceService: SourceService ) { companion object { @@ -25,38 +24,34 @@ class ExampleResource( } @GET - @Produces(MediaType.TEXT_PLAIN) - fun hello( - @RestQuery + @Path("/{provider}/set") + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + fun scrapeSet( + @RestPath provider: String, @RestQuery - path: String + setName: String ): Map { - val sources = sourcesConfiguration - .sources() - .filter { - it.id().equals(provider, ignoreCase = true) - } + return scrapeService.extractSet( + provider, + setName + ) + } - val source = sources.firstOrNull() ?: throw IllegalArgumentException("Provider $provider not found") - - val newPath: String = path - .trim() - .replace(" ", "_") - - val doc: Document = Jsoup.connect("https://${source.domain()}/$newPath").get() - - val regionalSetSelector = source.selectors().regionalSet().get() - val regionalSetRoot = doc.selectFirst(regionalSetSelector.root().get())!! - - val setId: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().id().steps()) - val setLanguage: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().language().steps()) - val setKey: String? = scrapeService.extractTextFromRootBySteps(regionalSetRoot, source.selectors().regionalSet().get().regionKey().steps()) - - return mapOf( - Pair("id", setId ?: "N/A"), - Pair("language", setLanguage ?: "N/A"), - Pair("key", setKey ?: "N/A"), + @GET + @Path("/{provider}/card") + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + fun scrapeCard( + @RestPath + provider: String, + @RestQuery + cardName: String + ): Map { + return scrapeService.extractCard( + provider, + cardName ) } } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/config/Attribute.kt b/src/main/kotlin/com/rak/model/config/Attribute.kt deleted file mode 100644 index 072e681..0000000 --- a/src/main/kotlin/com/rak/model/config/Attribute.kt +++ /dev/null @@ -1,5 +0,0 @@ -package com.rak.model.config - -data class Attribute( - val steps: Set -) \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/config/CardConfigModel.kt b/src/main/kotlin/com/rak/model/config/CardConfigModel.kt deleted file mode 100644 index 573bce5..0000000 --- a/src/main/kotlin/com/rak/model/config/CardConfigModel.kt +++ /dev/null @@ -1,8 +0,0 @@ -package com.rak.model.config - -data class CardConfigModel( - override val root: String, - val name: Attribute, - val attack: Attribute, - val effect: Attribute, -) : ConfigModel \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/config/ConfigModel.kt b/src/main/kotlin/com/rak/model/config/ConfigModel.kt deleted file mode 100644 index eabe196..0000000 --- a/src/main/kotlin/com/rak/model/config/ConfigModel.kt +++ /dev/null @@ -1,5 +0,0 @@ -package com.rak.model.config - -interface ConfigModel { - val root: String -} diff --git a/src/main/kotlin/com/rak/model/config/RegionalSetConfigModel.kt b/src/main/kotlin/com/rak/model/config/RegionalSetConfigModel.kt deleted file mode 100644 index 42693d5..0000000 --- a/src/main/kotlin/com/rak/model/config/RegionalSetConfigModel.kt +++ /dev/null @@ -1,8 +0,0 @@ -package com.rak.model.config - -data class RegionalSetConfigModel( - override val root: String, - val id: Attribute, - val language: Attribute, - val regionKey: Attribute -) : ConfigModel \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/config/SourceConfiguration.kt b/src/main/kotlin/com/rak/model/config/SourceConfiguration.kt deleted file mode 100644 index d470489..0000000 --- a/src/main/kotlin/com/rak/model/config/SourceConfiguration.kt +++ /dev/null @@ -1,8 +0,0 @@ -package com.rak.model.config - -data class SourceConfiguration( - val id: String, - val name: String, - val urlPatterns: Set, - val selectors: Set -) \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/config/Step.kt b/src/main/kotlin/com/rak/model/config/Step.kt deleted file mode 100644 index 679087d..0000000 --- a/src/main/kotlin/com/rak/model/config/Step.kt +++ /dev/null @@ -1,11 +0,0 @@ -package com.rak.model.config - -data class Step( - val type: Type, - val value: String -) { - enum class Type { - XPATH, - CSS - } -} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/ScrapeService.kt b/src/main/kotlin/com/rak/service/ScrapeService.kt index a7375d3..1e25082 100644 --- a/src/main/kotlin/com/rak/service/ScrapeService.kt +++ b/src/main/kotlin/com/rak/service/ScrapeService.kt @@ -1,20 +1,26 @@ package com.rak.service +import com.rak.config.RegionalSetDefinition import com.rak.config.SourcesConfiguration -import com.rak.model.XPathTarget +import com.rak.config.Step import com.rak.util.XPathUtil import jakarta.enterprise.context.ApplicationScoped +import org.jsoup.Jsoup +import org.jsoup.nodes.Document import org.jsoup.nodes.Element -import org.jsoup.nodes.TextNode -import org.jsoup.select.Evaluator -import java.util.concurrent.LinkedBlockingQueue @ApplicationScoped -class ScrapeService { +class ScrapeService( + private val sourceService: SourceService +) { - fun extractTextFromRootBySteps( + companion object { + private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$") + } + + private fun extractTextFromRootBySteps( root: Element, - steps: Set + steps: Set ): String? { var currentElement: Element? = root.clone() var result: String? = null @@ -37,4 +43,51 @@ class ScrapeService { return result } + fun extractSet( + provider: String, + setName: String, + ): Map { + val source = + sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found") + + + val path: String = normalizePath(setName) + val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get() + val regionalSetSelector = source.getItems().regionalSet().get() + + val regionalSetRoot = document.selectFirst(regionalSetSelector.rootSelector().value())!! + + val setId: String? = extractTextFromRootBySteps( + regionalSetRoot, + regionalSetSelector.idSelector().steps() + ) + val setLanguage: String? = extractTextFromRootBySteps( + regionalSetRoot, + regionalSetSelector.languageSelector().steps() + ) + val setKey: String? = extractTextFromRootBySteps( + regionalSetRoot, + regionalSetSelector.regionKeySelector().steps() + ) + + return mapOf( + Pair("id", setId ?: "N/A"), + Pair("language", setLanguage ?: "N/A"), + Pair("key", setKey ?: "N/A"), + ) + } + + + fun extractCard( + provider: String, + cardName: String, + ): Map { + val path: String = normalizePath(cardName) + return mapOf() + } + + private fun normalizePath(path: String): String = path + .trim() + .replace(" ", "_") + } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/service/SourceService.kt b/src/main/kotlin/com/rak/service/SourceService.kt index 870a528..d638f14 100644 --- a/src/main/kotlin/com/rak/service/SourceService.kt +++ b/src/main/kotlin/com/rak/service/SourceService.kt @@ -1,31 +1,15 @@ package com.rak.service +import com.rak.config.SourceConfig import com.rak.config.SourcesConfiguration -import com.rak.model.config.SourceConfiguration -import jakarta.annotation.PostConstruct import jakarta.enterprise.context.ApplicationScoped @ApplicationScoped class SourceService ( - private val sourcesConfiguration: SourcesConfiguration + val sourcesConfiguration: SourcesConfiguration ) { - private val sources: MutableSet = mutableSetOf() - - @PostConstruct - fun init() { - sourcesConfiguration - .sources() - .forEach { source -> - val config = SourceConfiguration( - source.id(), - source.name(), - source.urlPatterns().orElse(mutableSetOf()), - setOf() - ) - - sources.add(config) - } - } + fun getSources(): Set = sourcesConfiguration.getSources().toSet() + fun getSourceById(id: String): SourceConfig? = getSources().firstOrNull { it.getId() == id } } \ No newline at end of file diff --git a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter index 301a868..c2a4dbc 100644 --- a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter +++ b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter @@ -1 +1 @@ -com.rak.model.config.TestConverter$ActualConverter \ No newline at end of file +com.rak.config.converter.TypeSelectorConverter \ No newline at end of file diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 064a9fb..266bb17 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,3 +1,7 @@ +quarkus: + http: + port: 8081 + scraper: sources: - id: konami-official @@ -25,20 +29,20 @@ scraper: - "^https://yugioh\\.fandom\\.com/wiki/.*$" selectors: regional-set: - root: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" + root: + type: css + value: "h3:contains(Prefix(es)) + div > ul:nth-child(1) > li" id: steps: - - type: "xpath" + - type: xpath value: "//li/text()" language: steps: - - type: "xpath" + - type: xpath value: "//li/abbr" - - type: "xpath" + - type: xpath value: "//abbr/@title" region-key: steps: - - type: "xpath" - value: "//li/abbr/text()" - testing: - waaa: test \ No newline at end of file + - type: xpath + value: "//li/abbr/text()" \ No newline at end of file