Compare commits

..

4 Commits

Author SHA1 Message Date
7860819029 Add CI/CD 2025-07-15 19:14:54 +02:00
304490b52e Correct YGO Fandom name transformation regex 2025-07-06 15:05:51 +02:00
ce5b87c34e Minor moddel adjustments 2025-07-01 12:54:56 +02:00
a9f6efc818 Minor config adjustment 2025-07-01 12:54:32 +02:00
11 changed files with 81 additions and 42 deletions

View File

@@ -0,0 +1,32 @@
name: Create and Push Release
on:
workflow_dispatch:
env:
AUTHENTIK_URL: https://auth.smoothbrain.win
REGISTRY_URL: gitea.smoothbrain.win
IMAGE_OWNER: rak
IMAGE_NAME: dex-scraper-java
jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup JDK
uses: https://gitea.smoothbrain.win/rak/setup-java@main
with:
distribution: 'corretto'
java-version: '21.0.6'
cache: 'gradle'
- name: Build & Push Image
env:
QUARKUS_CONTAINER_IMAGE_USERNAME: ${{ secrets.CI_SERVICE_ACCOUNT }}
QUARKUS_CONTAINER_IMAGE_PASSWORD: ${{ secrets.CI_SERVICE_ACCOUNT_PASSWORD }}
run: |
./gradlew clean build \
-Dquarkus.container-image.push=true

View File

@@ -22,6 +22,7 @@ dependencies {
implementation("io.quarkus:quarkus-rest-client-kotlin-serialization") implementation("io.quarkus:quarkus-rest-client-kotlin-serialization")
implementation("io.quarkus:quarkus-rest-jackson") implementation("io.quarkus:quarkus-rest-jackson")
implementation("io.quarkus:quarkus-kotlin") implementation("io.quarkus:quarkus-kotlin")
implementation("io.quarkus:quarkus-smallrye-fault-tolerance")
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8") implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
implementation("io.quarkus:quarkus-arc") implementation("io.quarkus:quarkus-arc")
implementation("org.jsoup:jsoup:1.20.1") implementation("org.jsoup:jsoup:1.20.1")

View File

@@ -1,11 +1,15 @@
package com.rak.config.model package com.rak.config.model
import io.smallrye.config.WithDefault
import io.smallrye.config.WithName import io.smallrye.config.WithName
import java.util.* import java.util.*
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig { interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
@WithName("type") @WithName("type")
fun getType(): String fun getType(): String
@WithName("nullable")
@WithDefault("false")
fun isNullable(): Boolean
@WithName("root") @WithName("root")
fun getRootConfig(): Optional<ExtractConfig> fun getRootConfig(): Optional<ExtractConfig>
@WithName("extractors") @WithName("extractors")

View File

@@ -1,9 +1,7 @@
package com.rak.model.card package com.rak.model.card
import com.rak.model.set.RegionalSet
data class CardPrint( data class CardPrint(
val id: String, var id: Int,
val name: String, val name: String,
val regionalName: String? = null, val regionalName: String? = null,
val rarity: String val rarity: String
@@ -11,10 +9,17 @@ data class CardPrint(
companion object { companion object {
fun fromMap(map: Map<String, String>): CardPrint { fun fromMap(map: Map<String, String>): CardPrint {
val regionalNameValue = map["regionalName"]
val regionalName = if (regionalNameValue == "") {
null
} else {
regionalNameValue
}
return CardPrint( return CardPrint(
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"), map["id"]?.toInt() ?: throw IllegalStateException("Parameter 'prefix' not found"),
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"), map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
map["regionalName"], regionalName,
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"), map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
) )
} }

View File

@@ -22,28 +22,6 @@ data class RegionalSet(
) )
} }
fun flattenFromMemberLists(
idList: List<String>,
languageList: List<String>,
regionKeyAliasList: List<String>,
): MutableSet<RegionalSet> {
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
throw IllegalArgumentException("Lists have to be the same size")
}
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
for (index in 0..idList.size - 1) {
regionalSetList.add(RegionalSet(
prefix = idList[index],
region = languageList[index],
regionCode = regionKeyAliasList[index],
listOf(),
numberOfCards = -1
))
}
return regionalSetList
}
} }
} }

View File

@@ -11,7 +11,7 @@ class TransformationRegistry {
init { init {
register("trim") { it.trim() } register("trim") { it.trim() }
register("removeInnerQuotes") { it.replace("\"", "") } register("removeInnerQuotes") { it.replace(Regex("^\""), "").replace(Regex("\"$"), "") }
register("replace") { input, parameters -> register("replace") { input, parameters ->
require(parameters.size == 1 || parameters.size == 2) { require(parameters.size == 1 || parameters.size == 2) {
"'replace' requires either 1 or 2 parameters" "'replace' requires either 1 or 2 parameters"

View File

@@ -46,7 +46,7 @@ class CommonCrawlService(
crawlName.indexName crawlName.indexName
)) ))
} catch (ex: RuntimeException) { } catch (ex: RuntimeException) {
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url") Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url", ex)
} }
} }

View File

@@ -54,7 +54,7 @@ class ScrapeService(
try { try {
document = Jsoup.connect(url).get() document = Jsoup.connect(url).get()
} catch(ex: Exception) { } catch(ex: Exception) {
Log.warn("Error occurred during Jsoup query") Log.warn("Error occurred during Jsoup query", ex)
throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'") throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'")
} }
} }

View File

@@ -6,14 +6,17 @@ import io.netty.buffer.ByteBufInputStream
import io.quarkus.rest.client.reactive.ClientQueryParam import io.quarkus.rest.client.reactive.ClientQueryParam
import io.quarkus.rest.client.reactive.NotBody import io.quarkus.rest.client.reactive.NotBody
import io.quarkus.rest.client.reactive.Url import io.quarkus.rest.client.reactive.Url
import io.smallrye.faulttolerance.api.RateLimit
import jakarta.ws.rs.Consumes import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET import jakarta.ws.rs.GET
import jakarta.ws.rs.Path import jakarta.ws.rs.Path
import jakarta.ws.rs.PathParam import jakarta.ws.rs.PathParam
import jakarta.ws.rs.QueryParam import jakarta.ws.rs.QueryParam
import org.eclipse.microprofile.faulttolerance.Bulkhead
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider import org.eclipse.microprofile.rest.client.annotation.RegisterProvider
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
import java.time.temporal.ChronoUnit
@RegisterRestClient(baseUri = "whatever") @RegisterRestClient(baseUri = "whatever")
@RegisterProvider(NDJsonReader::class) @RegisterProvider(NDJsonReader::class)
@@ -23,6 +26,11 @@ interface CommonCrawlRestClient {
@ClientQueryParam(name = "output", value = ["json"]) @ClientQueryParam(name = "output", value = ["json"])
@Path("/{index}-index") @Path("/{index}-index")
@Consumes("text/x-ndjson") @Consumes("text/x-ndjson")
@RateLimit(
value = 1,
minSpacing = 5
)
@Bulkhead
fun queryIndex( fun queryIndex(
@Url @Url
baseUrl: String, baseUrl: String,

View File

@@ -130,7 +130,11 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
val extractedText = extractTextFromElementByTargetFieldConfig( val extractedText = extractTextFromElementByTargetFieldConfig(
rootElement, rootElement,
fieldConfig fieldConfig
) ?: throw ElementNotFoundException("Could not find element for '$identifier'") ) ?: if (fieldConfig.isNullable()) {
""
} else {
throw ElementNotFoundException("Could not find element for '$identifier'")
}
val mapToModify: MutableMap<String, String> = try { val mapToModify: MutableMap<String, String> = try {
resultList[index] resultList[index]
@@ -224,14 +228,8 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
} catch (ex: RuntimeException) { } catch (ex: RuntimeException) {
when (ex) { when (ex) {
is ElementNotFoundException, is ElementNotFoundException,
is IllegalStateException -> { is IllegalStateException,
// if (extractionConfig.getFallbackConfiguration().isPresent) { is ValueValidationException -> Log.debug(ex.message)
// intermediateResult = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
// } else {
// throw ex
// }
}
is ValueValidationException -> Log.warn(ex.message)
else -> throw ex else -> throw ex
} }
} }

View File

@@ -1,6 +1,15 @@
quarkus: quarkus:
container-image:
registry: gitea.smoothbrain.win
group: rak
build: true
additional-tags: latest
http: http:
port: 8081 port: 8081
live-reload:
instrumentation: true
scraper: scraper:
sources: sources:
@@ -96,6 +105,11 @@ scraper:
" .+", " .+",
"" ""
] ]
- name: "regexReplace"
parameters: [
".+-[A-Za-z]*0?",
""
]
validation: validation:
pattern: "^.+-.+\\\\d.+$" pattern: "^.+-.+\\\\d.+$"
name: name:
@@ -109,7 +123,7 @@ scraper:
transform: transform:
- name: "regexReplace" - name: "regexReplace"
parameters: [ parameters: [
"\\(.+\\)", " ?\\(.+\\)",
"" ""
] ]
- name: "removeInnerQuotes" - name: "removeInnerQuotes"
@@ -117,9 +131,8 @@ scraper:
validation: validation:
pattern: "^\".+\".*" pattern: "^\".+\".*"
regional-name: regional-name:
fallback:
default: "N/A"
type: int type: int
nullable: true
extractors: extractors:
- steps: - steps:
- type: xpath - type: xpath