expeStats.js 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. 'use strict'
  2. // import { experiments } from './experimentConfig'
  3. const config = require('./experimentConfig')
  4. const mongoose = require('mongoose')
  5. const configApp = require('./config')
  6. const mongoDatabaseURI = configApp.mongoDatabaseURI
  7. const fs = require('fs-extra')
  8. const winston = require('winston')
  9. const execSync = require('child_process').execSync
  10. // const connectDb = async () => {
  11. // await mongoose.connect(mongoDatabaseURI, { useNewUrlParser: true, useFindAndModify: false })
  12. // mongoose.connection.on('error', (err) => console.log(err))
  13. // }
  14. // get whitelist scene for MatchExtractsWithReference experiment
  15. const scenes = config.experiments.MatchExtractsWithReference.availableScenes.whitelist
  16. // File logger configuration
  17. const fileLogger = winston.createLogger({
  18. level: 'info',
  19. format: winston.format.json(),
  20. transports: [
  21. new winston.transports.File({ filename: 'logs/expeStats.log' }),
  22. new winston.transports.File({ filename: 'logs/expeStats.error.log', level: 'error' }),
  23. new winston.transports.Console({
  24. level: 'debug',
  25. handleExceptions: true,
  26. format: winston.format.json()
  27. })
  28. ],
  29. exitOnError: false
  30. })
  31. const setup = async (logToFile = false) => {
  32. // await connectDb()
  33. if (logToFile) fileLogger.info({ log: 'Start extraction of data from mongo for `MatchExtractsExperiments`.', date: new Date() })
  34. execSync('python utils/extract_experiment.py', { encoding: 'utf-8' })
  35. if (logToFile) fileLogger.info({ log: 'Mongo extraction done', date: new Date() })
  36. execSync('python utils/extract_stats_freq_and_min_all.py --file results/experiments_results.json --output results/match_extracts_stats.csv', { encoding: 'utf-8' })
  37. if (logToFile) fileLogger.info({ log: 'Stats computation done, need to create probability for each scene', date: new Date() })
  38. // read extracted stats in order to compute probabilities
  39. let statsPath = 'results/match_extracts_stats.csv'
  40. let buffer = fs.readFileSync(statsPath)
  41. let lines = buffer.toString().split('\n')
  42. let stats = {}
  43. let nUsers = 0
  44. for (let l of lines) {
  45. if (l.length > 0) {
  46. // extract data from csv file
  47. let data = l.split(';')
  48. // data[0] contains scene name
  49. // data[1] contains number of users who do this scene
  50. let u = Number(data[1])
  51. stats[String(data[0])] = u
  52. nUsers += u
  53. }
  54. }
  55. // start computing probabilities
  56. let probabilities = {}
  57. let probsArr = []
  58. let nUnknownScenes = 0
  59. // based on white list
  60. for (let s of scenes) {
  61. if (s in stats) {
  62. probabilities[s] = stats[s] / nUsers
  63. probsArr.push(probabilities[s])
  64. }
  65. else {
  66. nUnknownScenes += 1
  67. }
  68. }
  69. // normalize probabilities
  70. let currentMax = Math.max(...probsArr)
  71. for (let s of scenes) {
  72. // if new scene
  73. if (!(s in stats)) {
  74. // multiply prob criteria based on number of unknown scene
  75. // => increase chance for user to pass this scene
  76. probabilities[s] = (1 + (1 - (nUnknownScenes / scenes.length))) * currentMax
  77. probsArr.push(probabilities[s])
  78. }
  79. }
  80. // get sum of current probs
  81. let sum = probsArr.reduce((a, b) => a + b, 0)
  82. for (let s of scenes) {
  83. probabilities[s] /= sum
  84. }
  85. if (logToFile) fileLogger.info({ log: 'New probabilities extracted:' + JSON.stringify(probabilities, null, 3), date: new Date() })
  86. fs.writeFile('results/match_extracts_probs.json', JSON.stringify(probabilities, null, 3))
  87. }
  88. // Execute setup command
  89. setup()
  90. module.exports = { setup, expeStatsServiceLogger: fileLogger }